In [2]:
import pandas as pd
import math
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
import xarray as xr

In [5]:
# open catalogue dataset
df = pd.read_csv('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')

# open netcdf mask file
file = '/pampa/picart/Masks/mask_GEM5_ERA5grid'
data = xr.open_dataset(file)

# export netcdf to dataframe
mask = data.to_dataframe()

# drop index lat lon, but keep columns
mask = mask.reset_index()

# rename lat & lon columns as latitude & longitude
mask = mask.rename(columns={'lat' : 'latitude', 'lon' : 'longitude'})

### Keep storms that were active in NNA for at least 24 CONSECUTIVES hours

In [138]:
# Create a new dataframe to store the storms that were active 
# for at least 24 CONSECUTIVES hours in the CRCM6 domain

merge = df.merge(mask, how='left', on=['latitude', 'longitude'])
df24_consec = pd.DataFrame(columns=df.columns)
#print(df24)# 

# make sure that df24 and mask have the same number of coords that are true

# true_df = (df24.loc[(df24.HU == True)])
# coord_count = true_df.groupby(['latitude', 'longitude']).count()
# print(len(coord_count))
# print(len(mask.loc[mask.HU == True]))

# replace NaN with False
merge = merge.fillna(value=False)

# iterate through each storm
for storm_id in merge['storm'].unique():
    storm_data = merge[merge['storm'] == storm_id].copy()
    count_domain = 0
    nna_lifetime = []

    for _, row in storm_data.iterrows() :
        #print(row['storm'], row['latitude'], row['longitude'], row['HU'], count_domain)
        if row['HU'] == True :
            count_domain += 1
        # if we have count > 24 and encounter a False value, exit 'for'
        # statement
        if row['HU'] == False and count_domain >= 24 :  
            break
        # if we don't have count > 24 yet but we encounter a False value, 
        # reset count and go to the next grid point 
        if row['HU'] == False and count_domain < 24 : 
            count_domain = 0
            continue

    if count_domain >= 24 : 
        df24_consec = df24_consec.append(storm_data)
        # check where the code is at 
        print(df24_consec['datetime'].iloc[-1])

print(len(df24_consec.groupby(['storm']).count()))
df24_consec.to_csv('/pampa/cloutier/df24_consec.csv')

### Keep storms that were active in NNA for at least 24 hours IN TOTAL

In [137]:
# Create a new dataframe to store the storms that were active 
# for more than 24h IN TOTAL (not consecutively) the CRCM6 domain

merge = df.merge(mask, how='left', on=['latitude', 'longitude'])
df24_noconsec = pd.DataFrame(columns=df.columns)
#print(df24)

# make sure that df24 and mask have the same number of coords that are true

# true_df = (df24.loc[(df24.HU == True)])
# coord_count = true_df.groupby(['latitude', 'longitude']).count()
# print(len(coord_count))
# print(len(mask.loc[mask.HU == True]))

# replace NaN with False
merge = merge.fillna(value=False)

# iterate through each storm
for storm_id in merge['storm'].unique():
    storm_data = merge[merge['storm'] == storm_id].copy()
    count_domain = 0
    
    group = storm_data.groupby(['storm','HU']).size()
    # Calculate the sum of 'HU' values that are True
    hu_sum = storm_data.loc[storm_data['HU'] == True, 'HU'].sum()  

    if hu_sum >= 24 : 
        df24_noconsec = df24_noconsec.append(storm_data)
        # check where the code is at 
        print(df24_noconsec['datetime'].iloc[-1])

print(len(df24_noconsec.groupby(['storm']).count()))
df24_noconsec.to_csv('/pampa/cloutier/df24_no_consec.csv')

In [141]:
print('# storm for 24h consecutive : ', len(df24_consec.groupby(['storm']).count()))
print('# storm for 24h NOT consecutive : ',len(df24_noconsec.groupby(['storm']).count()))

# storm for 24h consecutive :  6636
# storm for 24h NOT consecutive :  6708


### Extract ETC DataFrame for specific season
OLD VERSION NOT WORKING

In [31]:
# extract month from datetime with (df.datetime//10000)%100 and apply conditionnal selection according the the needed months

#    def get_season
#    ARGS m1, m2, m3 (int) : The 3 wanted months 
#    RETURNS DataFrame of all the ETCs within the wanted season
def get_season(m1, m2, m3) :
    return df24.loc[((df24.datetime//10000)%100 == m1) 
                    | ((df24.datetime//10000)%100 == m2) 
                    | ((df24.datetime//10000)%100 == m3)]

jja = get_season(6,7,8)
son = get_season(9,10,11)
djf = get_season(12,1,2)
mam = get_season(3,4,5)

In [32]:
# create csv files for each season

djf.to_csv('/pampa/cloutier/etc_24_nna_djf.csv', index = False)
mam.to_csv('/pampa/cloutier/etc_24_nna_mam.csv', index = False)
jja.to_csv('/pampa/cloutier/etc_24_nna_jja.csv', index = False)
son.to_csv('/pampa/cloutier/etc_24_nna_son.csv', index = False)