## This notebook computes the degree heating weeks index in a 9 weeks moving window

In [None]:
from datetime import datetime
from netCDF4 import Dataset as ds
import numpy as np
import dateutil as du 
import parser
import pytz
import dateutil.parser 
import pandas as pd
from pandas import DataFrame
import csv
import glob, os
from os.path import expanduser
from pathlib import Path
import xarray as xr 

In [None]:
### Load .CSV and 
### subset bleaching events (conditional, e.g. exclude NaN)
### Filter only CodeID, site names and country" 
df = pd.read_csv('CBdbV2.csv',encoding='latin-1', low_memory=False)
df = df[['RECORD_ID','COUNTRY','LAT_corrected','LON_corrected','SEVERITY_CODE','DATE','MONTH','YEAR']]
df = pd.DataFrame(df)
#### Convert index into column
#df.reset_index(level=0, inplace=True)
df['ITEM_ID'] = df.index.astype(str).str.zfill(7)   # add '0s' at the beginning of the number and convert as string 
df.rename(columns={'DATE':'DAY'}, inplace = True) #use "implace = True" to rename the existing DataFrame
df = df[pd.to_numeric(df['DAY'], errors='coerce').notnull()]  # Drop rows that do not have as.numeric DATE (DAY)
df['DAY'] = df['DAY'].apply(lambda x: int(float(x)))
df['DAY'] = df['DAY'].apply('{0:0>2}'.format)
df['MONTH'] = df['MONTH'].apply(lambda x: int(float(x)))
df['MONTH'] = df['MONTH'].apply('{0:0>2}'.format)
df['full_date'] = df.apply(lambda x: '%s-%s-%s' % (x['YEAR'], int(float(x['MONTH'])), x['DAY']), axis=1) # create a column in datetime format; %s - String (or any object with a string representation, like numbers), %d - Integers %f - Floating point numbers
df['full_date'] = df['full_date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d")) # use datetime funtion to convert string 'full_date' to datetime  
df['JD'] = df['full_date'].apply(lambda x: x.timetuple().tm_yday)
### Re-order by column starting by 'ID' and renaming LAT and LON
df = df.reindex(columns=['ITEM_ID','COUNTRY','LAT_corrected','LON_corrected','SEVERITY_CODE','DAY','MONTH','YEAR','full_date','JD'])
df.rename(columns={'LAT_corrected': 'lat', 'LON_corrected': 'lon'}, inplace=True) # use "implace = True" to rename the existing DataFrame (rather than creating a copy)
df = df.dropna(axis=0, how='any')  # Drop row with NaN if any

In [None]:
# Functions 
def file_info(ncfilename):
    '''
    This functions opens the ncfile and extract the values in as an array 
    '''
    with ds(ncfilename,'r') as ncdat:
        array = xr.open_dataset(ncfilename) # open_dataset works fine with either full path or "filename.nc" 
        return array  
def array_sel(array):
    '''
    this function sum the positive values from the previous 63 days/9 weeks   
    '''
    dsloc = array.sel(lon=lon,lat=lat, method='nearest') 
    val_arr = dsloc['hotspot']
    val_arr = val_arr.rolling(time=63).sum()
    val_arr = val_arr.sel(time=time.strftime("%Y-%m-%d")) # slice to a single given date
    return val_arr 

In [None]:
# Extract arrays per row and put them in a list
home = Path(expanduser("~"))
x = []
for row in df.itertuples():
        lat = row.lat
        lon = row.lon
        time = row.full_date
        path = home / Path("your path and document.nc") # here we use the "HotSpot's file"
        array = file_info(path)
        cf = array_sel(array)
        x.append(cf)

In [None]:
# # create a list for sst values # create lists for coordinates (time) 
DHW_9 = [item[0].values for item in x]   # get array of values; analysed sst in this case
df['DHW_9'] = DHW_9
df.to_csv('DHW_9_per_report.csv')