In [None]:
import tropycal.tracks as tracks
import datetime as dt
import numpy as np
import pandas as pd

In [10]:
import sys
sys.path.append('../')
from utils import data_processing as proc

In [13]:
#read north atlantic datasets
hurdat= tracks.TrackDataset(basin='both',source='hurdat',include_btk=False)
#example to get forecast for one storm 
storm = hurdat.get_storm(('michael', 2018))
forecast = storm.get_operational_forecasts()
print('all available models', storm.get_operational_forecasts().keys()) 

# I chose models to use:
# #'HWRF','SHIP','NVGM','AEMI','NAM 
model_list = set(['HWRF','SHIP', 'AEMI','CLAP5','EMXI','CMC']).intersection(forecast.keys())

#This website has the explanation of all the models: 
#https://www.nhc.noaa.gov/modelsummary.shtml#:~:text=The%20National%20Hurricane%20Center%20 

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (9.85 seconds)
all available models dict_keys(['CARQ', 'NAM', 'AC00', 'AEMN', 'AP01', 'AP02', 'AP03', 'AP04', 'AP05', 'AP06', 'AP07', 'AP08', 'AP09', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15', 'AP16', 'AP17', 'AP18', 'AP19', 'AP20', 'AVNO', 'AVNX', 'CLP5', 'CTCX', 'DSHP', 'GFSO', 'HCCA', 'IVCN', 'IVDR', 'LGEM', 'OCD5', 'PRFV', 'SHF5', 'SHIP', 'TABD', 'TABM', 'TABS', 'TCLP', 'XTRP', 'CMC', 'NGX', 'UKX', 'AEMI', 'AHNI', 'AVNI', 'CEMN', 'CHCI', 'CTCI', 'DSPE', 'EGRR', 'LGME', 'NAMI', 'NEMN', 'RVCN', 'RVCX', 'SHPE', 'TBDE', 'TBME', 'TBSE', 'TVCA', 'TVCE', 'TVCN', 'TVCX', 'TVDG', 'UE00', 'UE01', 'UE02', 'UE03', 'UE04', 'UE05', 'UE06', 'UE07', 'UE08', 'UE09', 'UE10', 'UE11', 'UE12', 'UE13', 'UE14', 'UE15', 'UE16', 'UE17', 'UE18', 'UE19', 'UE20', 'UE21', 'UE22', 'UE23', 'UE24', 'UE25', 'UE26', 'UE27', 'UE28', 'UE29', 'UE30', 'UE31', 'UE32', 'UE33', 'UE34', 'UE35', 'UEMN', 'CEMI', 'CMCI', 'COTC', 'EGRI', 'HMON', 'H

In [6]:
#function to get forecast of hurricane based on name and year 
def get_forecast(hurdat, name, year, pred=24): #pred: hours prediction 
    try:
        storm = hurdat.get_storm((name, year))
        forecast = storm.get_operational_forecasts()
        #choose models 
        model_list = set(['HWRF','SHIP', 'AEMI','CLAP5','EMXI','CMC']).intersection(forecast.keys())
        #create empty df 
        df_out = pd.DataFrame(columns=['datetime'])
        for model in model_list:  
            df_model = pd.DataFrame()
            for time in forecast[model]:    
                df = pd.DataFrame(forecast[model][time])
                temp = df.loc[df['fhr']==pred]
                #select columns
                temp = temp[['lat','lon','vmax','mslp']]
                temp = temp.add_prefix(str(model)+'_'+str(pred)+'_')
                temp['datetime'] = pd.to_datetime(time, format = '%Y%m%d%H')
                df_model = pd.concat([df_model, temp], axis=0)
            df_out = df_out.merge(df_model, on='datetime', how='outer')
        df_out = df_out.sort_values(by='datetime')         
    except:
        print('no forecast for', name, year)
        df_out = pd.DataFrame(columns=['datetime'])
    return df_out


In [None]:
 def join_forecast(df, pred=24):
    print('joining forecast data')
    # read hurdat data set for both basins: north atlantic and east pacific 
    hurdat = tracks.TrackDataset(basin='both',source='hurdat',include_btk=False) 
    #get list of storm names and year 
    df['NAME'] = df['NAME'].str.lower()
    df['YEAR'] = df['ISO_TIME'].dt.year
    storm_list= df.loc[df['BASIN'].isin(['NA','EP'])] #north atlantic and east pacific
    storm_list = storm_list[['NAME','YEAR']].drop_duplicates() #get list of storm names and year 
    storm_list.reset_index(inplace=True, drop=True)
    
    for i in range(len(storm_list)):
        name  =storm_list['NAME'][i]
        year = storm_list['YEAR'][i]
        #get forecast for particular storm 
        df_forecast = get_forecast(hurdat, name, year, pred)
        #join with dataframe
        df_forecast['NAME']= name
        df_out = df.merge(df_forecast, how='left', left_on=['ISO_TIME','NAME'], right_on=['datetime','NAME'])
    #drop added columns
    df_out.drop(['NAME','datetime','YEAR'], inplace=True )
    return df_out 
# df1 = df0.iloc[:4000]
# df2 = join_forecast(df1, pred=24)


In [None]:
#testing it works with the prepare_tabular_vision code 
min_wind=50
min_steps=15
max_steps=120 
get_displacement=True 
one_hot = True
#documentation on data https://www.ncdc.noaa.gov/ibtracs/pdf/IBTrACS_v04_column_documentation.pdf 
path="../data/last3years.csv"
one_hot =False
data = pd.read_csv(path)
data.drop(0, axis=0, inplace=True) #drop secondary column names
# select interesting columns
df0 = data[['SID','NAME', 'BASIN','ISO_TIME', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'DIST2LAND', 'STORM_SPEED', 'STORM_DIR']]
# transform data from String to numeric
df0 = numeric_data(df0)
# smooth cos & sign of day
df0 = smooth_features(df0)
# # add wind category
df0['wind_category'] = df0.apply(lambda x: sust_wind_to_cat_val(x['WMO_WIND']), axis=1)
df0 = df0.iloc[:3000]
# join forecast data:
df0 = join_forecast(df0)
if one_hot:
    #adding BASIN and NATURE feature as a one hot
    df0 = add_one_hot(df0, data['BASIN'], 'basin')
    df0 = add_one_hot(df0, data['NATURE'], 'nature')
    #add category one_hot
    #df0 = add_one_hot(df0, df0['wind_category'], 'category')
print('df0 columns :', df0.columns)

# get a dict with the storms with a windspeed and number of timesteps greater to a threshold
storms = sort_storm(df0, min_wind, min_steps)
# pad the trajectories to a fix length
d = pad_traj(storms, max_steps)
# print(d)
if get_displacement:
    d = add_displacement_lat_lon2(d)
# print the shape of the tensor
m, n, t_max, t_min, t_hist = tensor_shape(d)
# create the tensor
t, p_list = create_tensor(d, m)

#put t in format storm * timestep * features
e = t.transpose((2, 0, 1))
for tt in e:
    try:
        tt[0] = datetime.strptime(tt[0], "%Y-%m-%d %H:%M:%S")
    except:
        pass