In [1]:
from datetime import datetime
from netCDF4 import Dataset as ds
import numpy as np
import dateutil as du 
import parser
import pytz
import dateutil.parser 
import pandas as pd
from pandas import DataFrame
import csv
import glob, os
from os.path import expanduser
from pathlib import Path
import xarray as xr 

In [14]:
### Load .CSV and 
### subset bleaching events (conditional, e.g. exclude NaN)
### Filter only CodeID, site names and country" 
df = pd.read_csv('CBdbV2.csv',encoding='latin-1', low_memory=False)
df = df[['RECORD_ID','COUNTRY','LAT_corrected','LON_corrected','SEVERITY_CODE','DATE','MONTH','YEAR']]
df = pd.DataFrame(df)
#### Convert index into column
df['ITEM_ID'] = df.index.astype(str).str.zfill(7)   # add '0s' at the beginning of the number and convert as string 

df = df[pd.to_numeric(df['MONTH'], errors='coerce').notnull()]  # Drop rows that do not have as.numeric MONTH
df['MONTH'] = df['MONTH'].apply(lambda x: int(float(x)))
df['MONTH'] = df['MONTH'].apply('{0:0>2}'.format)
df['DAY_RANK'] = df['MONTH'].apply(lambda x: 15 if x == '02' else 16)
df['rank_date'] = df.apply(lambda x: '%s-%s-%s' % (x['YEAR'], int(float(x['MONTH'])), x['DAY_RANK']), axis=1) # create a column in datetime format; %s - String (or any object with a string representation, like numbers), %d - Integers %f - Floating point numbers
df['rank_date'] = df['rank_date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d")) # use datetime funtion to convert string 'full_date' to datetime 
### Re-order by column starting by 'ID' and renaming LAT and LON
df['rank_date'] = pd.to_datetime(df['rank_date']).dt.to_period('M')
df = df.reindex(columns=['ITEM_ID','COUNTRY','LAT_corrected','LON_corrected','SEVERITY_CODE','MONTH','YEAR','rank_date'])
df.rename(columns={'LAT_corrected': 'lat', 'LON_corrected': 'lon'}, inplace=True) # use "implace = True" to rename the existing DataFrame (rather than creating a copy)
df = df.dropna(axis=0, how='any')  # Drop row with NaN if any

In [15]:
df = df.loc[df['YEAR'] == 1985] 

Unnamed: 0,ITEM_ID,COUNTRY,lat,lon,SEVERITY_CODE,MONTH,YEAR,rank_date
105,105,Panama,7.816667,-81.766667,-1,10,1985,1985-10


In [16]:
# Function to slice the filename with .nc extention, this will be used to open the files in directory
def file_info(ncfilename):
    with ds(ncfilename,'r') as ncdat:
        array = xr.open_dataset(ncfilename) # open_dataset works fine with either full path or filename.nc 
        return array  
def array_sel(array):
    dsloc = array.sel(lon=lon,lat=lat,method='nearest') 
    val_arr = dsloc['analysed_sst']
    return val_arr 

In [17]:
# Extract arrays per row and put them in a list
home = Path(expanduser("~"))
x = []
for row in df.itertuples():
        lat = row.lat
        lon = row.lon
        time = row.rank_date
        path = home / Path("/sst_monmean" + str(row.YEAR) + ".nc") 
        array = file_info(path)
        cf = array_sel(array)
        ranking = cf.rank('time')
        rank_max = max(ranking)
        rank_month = ranking.sel(time=time.strftime("%Y-%m-%d")) # ranking from coldest to warmest
        x.append(rank_max)

In [None]:
coor = [item.coords['time'] for item in x]    # get array of values of coordinate 'time' for analysed sst

In [None]:
# create lists of values (actual time) of coordinates  
coor_list = []
for item in coor: 
    cor_val = item.values
    coor_list.append(cor_val)

In [None]:
df_coor = pd.DataFrame(coor_list, columns=['Date_MMM'])
df_coor['Date_MMM'] = pd.to_datetime(df_coor.Date_MMM)
df_coor['Date_MMM'] = df_coor['Date_MMM'].dt.strftime('%Y-%m-%d')
df_coor

In [None]:
df_coor.to_csv('df_MONdateofMMM.csv', index=False)