In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import geopandas as gpd
from tqdm import tqdm
import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
plt.rcParams.update({'font.size': 14})
from matplotlib.offsetbox import AnchoredText
from IPython.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

from calendar import monthrange
import datetime
import warnings
warnings.filterwarnings('ignore')

###### Assign random date

In [2]:
pathin = Path(r'../data/STORM/processed')
dfall = pd.read_pickle(pathin/'STORM_NA_R4.pkl')

In [3]:
dfcl = pd.read_pickle(pathin/'STORM_NA_R4_In_pnt_v6_MDAvec_12params_1000.pkl')

In [4]:
dfcl.head()

Unnamed: 0,lat_at_min_distNC,lon_at_min_distNC,min_press_at_min_distNC,rad_to_max_ws_at_min_distNC,forward_speed_at_min_distNC,landfall,trajectory_in_roi,mean_trajectory_roi,mean_curvature_roi,duration,lat_gen,lon_gen,index_cluster,weight,npoints,tc_number
0,35.3,-74.2,997.3,51.444446,22.997927,0.0,210.963757,168.218203,0.248986,111.0,36.4,-73.7,3384,0.000204,2,37378.0
1,33.1,-71.4,924.8,13.581333,44.791196,1.0,26.565051,29.62122,0.018094,276.0,11.9,-37.5,5962,0.000612,6,65593.0
2,32.7,-71.9,1001.576427,166.679993,90.538476,0.0,33.690068,42.968477,0.13803,117.0,32.7,-71.9,9400,0.000102,1,105184.0
3,37.5,-76.4,993.3,132.398292,36.181619,1.0,299.054604,283.776639,0.043328,216.0,6.7,-28.7,4260,0.000306,3,46863.0
4,35.0,-77.0,996.5,28.573715,4.790183,1.0,341.565051,327.26481,1.203544,30.0,33.8,-76.9,7518,0.004489,44,83738.0


In [5]:
## full data of selected storms
dfallsub = dfall.loc[dfall['tc_number'].isin(dfcl['tc_number']), :]

In [15]:
## count number of storms per year

In [6]:
dfall.head()

Unnamed: 0,year,month,yr_tc_number,time_step,basin_id,lat,lon,min_press,max_ws,rad_to_max_ws,cat,landfall,dist_to_land,tc_number,dist_to_NC,trajectory,forwardSpeed
0,0.0,10.0,0.0,0.0,1.0,8.9,-45.2,993.946699,20.371998,46.299999,0.0,0.0,851.946656,0,4336.13314,281.309932,18.695934
1,0.0,10.0,0.0,1.0,1.0,9.0,-45.7,992.8,21.2,45.905957,0.0,0.0,815.967429,0,4290.629311,278.130102,18.695934
2,0.0,10.0,0.0,2.0,1.0,9.1,-46.4,992.1,21.7,45.511914,0.0,0.0,766.103506,0,4230.498007,276.340192,25.916882
3,0.0,10.0,0.0,3.0,1.0,9.2,-47.3,993.0,21.1,45.117872,0.0,0.0,705.759651,0,4156.153101,275.710593,33.179142
4,0.0,10.0,0.0,4.0,1.0,9.3,-48.3,993.1,20.9,44.723829,0.0,0.0,647.548285,0,4075.371766,281.309932,36.81221


In [7]:
nstorms_per_year = dfall.groupby(['year'])['yr_tc_number'].max()

In [8]:
mean_val = nstorms_per_year.mean()
std_val = nstorms_per_year.std()

In [9]:
mean_val

9.7065

In [10]:
std_val

3.2459838864570965

In [11]:
exe_id = np.random.randint(0, 99)

In [12]:
exe_id

0

In [13]:
dfcl2 = dfcl.copy()
dfout = pd.DataFrame()
for yr in tqdm(np.arange(2020, 1800, -1)):
    ## select random number storms for year yr
    n = np.random.randint(int(mean_val - std_val), int(mean_val + std_val))
    ## select random storms
    try:
        dfcls = dfcl2.sample(n)
    except:
        ## there are less storms without date assigned than n
        dfcls = dfcl2.copy()
    
    for i in range(len(dfcls)):
        dfa = dfallsub[dfallsub['tc_number'] == dfcls['tc_number'].iloc[i]]
        ndays = monthrange(yr, int(dfa['month'].unique()))[1]
        dummy = 0
        i = 0
        while dummy == 0:
            startday = np.random.randint(1, ndays)
            starthour = np.random.randint(0, 23)
            dfa.loc[:, 'date'] = pd.date_range(start = datetime.datetime(yr, int(dfa['month'].unique()), startday, starthour), periods = len(dfa), freq = '3H')
            aux = np.array([x.month for x in dfa['date']])
            l = aux[aux == dfa['month'].unique()]
            if len(l) > 3*len(dfa)/4:
                dummy = 1
#                 print(f'Done with {i} iterations')
        dfout = pd.concat([dfout, dfa])
    ## delete selected updated storms
    dfcl2 = dfcl2.drop(dfcls.index, axis = 'rows')
    
    if len(dfcl2) == 0:
        break
dfallsub['random_date'] = dfout.loc[dfallsub.index, 'date']

dfallsub.to_pickle(pathin/f'STORM_NA_R4_In_pnt_v6_MDAvec_12params_1000_map_original_ts_exec{exe_id:02d}.pkl')

 50%|█████████████████████████████████████                                     | 110/220 [00:03<00:03, 36.60it/s]


###### Get tide information

In [15]:
pathin = Path(r'../data/STORM/processed')
df = pd.read_pickle(pathin/f'STORM_NA_R4_In_pnt_v6_MDAvec_12params_1000_map_original_ts_exec{exe_id:02d}.pkl')

In [16]:
df.head()

Unnamed: 0,year,month,yr_tc_number,time_step,basin_id,lat,lon,min_press,max_ws,rad_to_max_ws,cat,landfall,dist_to_land,tc_number,dist_to_NC,trajectory,forwardSpeed,random_date
4454,12.0,6.0,14.0,0.0,1.0,22.8,-95.5,991.109469,20.371998,37.040001,0.0,0.0,244.279646,136,2275.313101,90.0,18.103784,1963-06-11 12:00:00
4455,12.0,6.0,14.0,1.0,1.0,22.8,-95.2,991.0,20.4,37.040001,0.0,0.0,272.553045,136,2252.246345,116.565051,10.267294,1963-06-11 15:00:00
4456,12.0,6.0,14.0,2.0,1.0,22.7,-95.0,989.9,21.2,37.040001,0.0,0.0,286.965024,136,2244.362245,108.434949,7.778969,1963-06-11 18:00:00
4457,12.0,6.0,14.0,3.0,1.0,22.6,-94.7,987.9,22.4,37.040001,0.0,0.0,313.049588,136,2229.085682,111.801409,10.921224,1963-06-11 21:00:00
4458,12.0,6.0,14.0,4.0,1.0,22.4,-94.2,985.0,24.2,37.040001,0.0,0.0,358.993825,136,2206.82442,90.0,18.670866,1963-06-12 00:00:00


In [17]:
for itc, tc in tqdm(enumerate(df['tc_number'].unique())):
    dfs = df[df['tc_number'] == tc]
    dur = (dfs['random_date'].iloc[-1] - dfs['random_date'].iloc[0]).total_seconds()/86400
    hh = dfs['random_date'].iloc[0].hour
    dd = dfs['random_date'].iloc[0].day
    mm = dfs['random_date'].iloc[0].month
    yy = dfs['random_date'].iloc[0].year
    with open(r'TideFac-Code-Executable/dates.txt', 'w') as fout:
        fout.write(f'{dur:0.3f}\n')
        fout.write(f'{hh:02d},{dd},{mm},{yy}')
    os.system(r'TideFac-Code-Executable/a.out < TideFac-Code-Executable/dates.txt > TideFac-Code-Executable/screen.txt')
    
    aux = pd.read_csv('tide_fac.out', skiprows=9, header = None, delim_whitespace = True,
                           names = [f'node_factor_{int(tc)}', f'eq_arg_{int(tc)}'])
    if itc == 0:
        dfout = aux.copy()
    else:
        dfout = pd.concat([dfout, aux], axis = 1)
dfout.to_pickle(pathin/f'Tide_variables_for_exec{exe_id:02d}.pkl')

1000it [00:16, 58.86it/s]


In [18]:
dfout

Unnamed: 0,node_factor_136,eq_arg_136,node_factor_454,eq_arg_454,node_factor_607,eq_arg_607,node_factor_622,eq_arg_622,node_factor_805,eq_arg_805,...,node_factor_108826,eq_arg_108826,node_factor_109134,eq_arg_109134,node_factor_109191,eq_arg_109191,node_factor_109271,eq_arg_109271,node_factor_109296,eq_arg_109296
M2,1.01415,248.43,0.99864,232.34,0.99205,330.79,0.98474,180.14,0.96519,102.41,...,0.9698,251.37,1.03631,264.18,1.0361,146.16,1.03262,292.76,0.9973,241.02
S2,1.0,0.0,1.0,300.0,1.0,240.0,1.0,240.0,1.0,30.0,...,1.0,120.0,1.0,270.0,1.0,300.0,1.0,150.0,1.0,330.0
N2,1.01415,330.31,0.99864,117.67,0.99205,131.09,0.98474,57.31,0.96519,183.97,...,0.9698,256.4,1.03631,2.63,1.0361,137.12,1.03262,221.34,0.9973,249.61
K1,0.96982,340.46,1.01924,39.75,1.03855,158.41,1.05888,287.24,1.10812,251.5,...,1.09713,299.26,0.88771,152.85,0.88859,30.06,0.90244,333.83,1.02326,37.55
M4,1.02849,136.87,0.99729,104.68,0.98416,301.59,0.96971,0.27,0.93159,204.82,...,0.94051,142.75,1.07394,168.37,1.07349,292.32,1.06631,225.52,0.9946,122.03
O1,0.95094,272.14,1.0309,196.49,1.06212,176.0,1.09504,256.1,1.17542,211.92,...,1.15738,313.94,0.816,112.92,0.81746,117.8,0.84053,321.69,1.03739,207.32
M6,1.04304,25.3,0.99593,337.03,0.97633,272.38,0.9549,180.41,0.89916,307.22,...,0.91211,34.12,1.11294,72.55,1.11224,78.47,1.1011,158.27,0.99191,3.05
MK3,0.98354,228.89,1.01785,272.09,1.03029,129.2,1.04272,107.38,1.06954,353.91,...,1.064,190.64,0.91995,57.04,0.92066,176.22,0.93188,266.59,1.02049,278.56
S4,1.0,0.0,1.0,240.0,1.0,120.0,1.0,120.0,1.0,60.0,...,1.0,240.0,1.0,180.0,1.0,240.0,1.0,300.0,1.0,300.0
MN4,1.02849,218.75,0.99729,350.02,0.98416,101.88,0.96971,237.45,0.93159,286.38,...,0.94051,147.78,1.07394,266.81,1.07349,283.28,1.06631,154.1,0.9946,130.63
