In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statistics
import xarray as xr

In [69]:
# dataframe with season column in which the ETC spent the more time
tracks = pd.read_csv('/pampa/cloutier/storm_tracks/NAEC/NAEC_1979-2020_max_season.csv')

In [70]:
total_storm = len(tracks[tracks['HU'] == True].groupby(['storm']).mean())

In [71]:
# Storm count for each season in CRCM6 domain
djf = tracks[(tracks['season'] == 'DJF') & (tracks['HU'] == True)].groupby(['storm']).mean()
jja = tracks[(tracks['season'] == 'JJA') & (tracks['HU'] == True)].groupby(['storm']).mean()
son = tracks[(tracks['season'] == 'SON') & (tracks['HU'] == True)].groupby(['storm']).mean()
mam = tracks[(tracks['season'] == 'MAM') & (tracks['HU'] == True)].groupby(['storm']).mean()

In [72]:
# % of storm for each season 
print('jja : ', (len(jja) / total_storm) * 100)
print('son : ', (len(son) / total_storm) * 100)
print('djf : ', (len(djf) / total_storm) * 100)
print('mam : ', (len(mam) / total_storm) * 100)

jja :  22.99869096767697
son :  25.13342060215487
djf :  26.482730842815428
mam :  25.385157587352737


In [75]:
# Only keep relevant column
df = tracks[['storm', 'season', 'HU', 'VORS_av08']]

In [76]:
df

Unnamed: 0,storm,season,HU,VORS_av08
0,1,DJF,True,0.000016
1,1,DJF,True,0.000017
2,1,DJF,True,0.000018
3,1,DJF,True,0.000018
4,1,DJF,True,0.000019
...,...,...,...,...
1833492,24604,DJF,False,0.000025
1833493,24604,DJF,False,0.000025
1833494,24604,DJF,False,0.000025
1833495,24604,DJF,False,0.000025


In [78]:
tracks['month'] = (tracks.datetime // 10000) % 100

In [90]:
p = tracks[(tracks['month'] > 11) & (tracks['season'] == 'SON')]
print(p.groupby(['storm']).mean())

       Unnamed: 0  lifetime      datetime   latitude   longitude  VORS_av08  \
storm                                                                         
524       38351.5     102.5  1.979120e+09  53.375000  209.250000   0.000015   
525       38440.5      86.5  1.979120e+09  59.625000  213.656250   0.000008   
532       38918.5     126.5  1.979120e+09  68.408537  342.932927   0.000040   
1136      82541.5      32.5  1.980120e+09  46.125000  275.250000   0.000014   
1141      82849.5     162.5  1.980120e+09  63.173611  275.877315   0.000023   
...           ...       ...           ...        ...         ...        ...   
23949   1784531.5      40.5  2.019120e+09  41.682692  272.298077   0.000040   
23950   1784598.0      54.0  2.019120e+09  39.135714  353.707143   0.000021   
24540   1828799.5      30.5  2.020120e+09  50.437500  198.562500   0.000041   
24541   1828841.5      40.5  2.020120e+09  70.187500  269.781250   0.000017   
24547   1829285.0     140.0  2.020120e+09  31.770548

In [101]:
# Get the month count for each storm and each season
month_count = tracks.groupby(['storm', 'season', 'month'], as_index = False).size()

In [110]:
# see if a storm appears in multiple months
dup = month_count.duplicated(subset=['storm'])
print(dup)

0        False
1        False
2        False
3        False
4        False
         ...  
27054    False
27055    False
27056    False
27057    False
27058    False
Length: 27059, dtype: bool


In [103]:
# Get the month that has the biggest size for each season
month_max = month_count.groupby(['storm', 'season', 'month'], as_index = False).max()

In [119]:
# See which seasons appear more than once (appear in many months)
storm_counts = month_max.groupby('storm').size()
duplicate_storms = storm_counts[storm_counts > 1]
print(duplicate_storms)

storm
54       2
55       2
56       2
57       2
59       2
        ..
24541    2
24542    2
24545    2
24546    2
24547    2
Length: 2455, dtype: int64


In [122]:
# Only keep the biggest size (the month that has the biggest size)
month_max = month_count.loc[month_count.groupby('storm')['size'].idxmax()]

In [127]:
# Check if all max month count falls in the right season

error_djf = month_max.loc[(month_max.season == 'DJF') & (month_max.month > 2) & (month_max.month < 12) ]
error_jja = month_max.loc[(month_max.season == 'JJA') & (month_max.month > 8) & (month_max.month < 6) ]
error_son = month_max.loc[(month_max.season == 'SON') & (month_max.month > 11) & (month_max.month < 9) ]
error_mam = month_max.loc[(month_max.season == 'MAM') & (month_max.month > 5) & (month_max.month < 3) ]

print(error_djf, error_mam, error_jja, error_son)

Empty DataFrame
Columns: [storm, season, month, size]
Index: [] Empty DataFrame
Columns: [storm, season, month, size]
Index: [] Empty DataFrame
Columns: [storm, season, month, size]
Index: [] Empty DataFrame
Columns: [storm, season, month, size]
Index: []
