In [1]:
import pandas as pd
import math
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
import xarray as xr

In [2]:
# open catalogue dataset
df = pd.read_csv('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')

# open netcdf mask file
file = '/pampa/picart/Masks/mask_GEM5_ERA5grid'
data = xr.open_dataset(file)

# export netcdf to dataframe
mask = data.to_dataframe()

# drop index lat lon, but keep columns
mask = mask.reset_index()

# keep in mask values where HU = true
maskT = mask.loc[mask.HU != False]

### Keep storms that were active in NNA for at least 24h

In [1]:
# Create a new dataframe to store the storms that were active 
# for more than 24h in the CRCM6 domain
# also creates a new column called lifetime nna that is basically the same as lifetime, 
# but within the nna region.

df24 = pd.DataFrame(columns=df.columns)
mask_lat = maskT['lat']
mask_lon = maskT['lon']

# Iterate through each storm in the original dataframe
for storm_id in df['storm'].unique():
    storm_data = df[df['storm'] == storm_id]
    count_domain = 0
    nna_lifetime = []
    
    # Check if storm location is within CRCM6 domain
    # If so, count the number of grid points that are within that domain.
    # If we have more than 24 points in the domain, the storm was active
    # for more than 24 hours in the domain.
    for index, row in storm_data.iterrows():
        lat = row['latitude']
        lon = row['longitude']
        
        if any((mask_lat == lat) & (mask_lon == lon)):
            count_domain += 1
            nna_lifetime.append(count_domain)
        else:
            nna_lifetime.append(0)
    
    storm_data['nna lifetime'] = nna_lifetime
    
    if count_domain >= 24:
        df24 = df24.append(storm_data)
            
print(len(df24.groupby(['storm'])))

# Save df24 to a CSV file
df24.to_csv('/pampa/cloutier/etc_24_nna.csv', index=False)

NameError: name 'pd' is not defined

### Extract ETC DataFrame for specific season

In [30]:
df24 = pd.read_csv('/pampa/cloutier/etc_24_nna.csv')

       lifetime  datetime  latitude  longitude  MSLPmin  VORSmax  VORS_av02  \
storm                                                                         
1            28        28        28         28       28       28         28   
3            32        32        32         32       32       32         32   
13           87        87        87         87       87       87         87   
26          125       125       125        125      125      125        125   
32           66        66        66         66       66       66         66   
...         ...       ...       ...        ...      ...      ...        ...   
24589        92        92        92         92       92       92         92   
24591       114       114       114        114      114      114        114   
24599        43        43        43         43       43       43         43   
24601       166       166       166        166      166      166        166   
24602       124       124       124        124      

In [31]:
# extract month from datetime with (df.datetime//10000)%100 and apply conditionnal selection according the the needed months

#    def get_season
#    ARGS m1, m2, m3 (int) : The 3 wanted months 
#    RETURNS DataFrame of all the ETCs within the wanted season
def get_season(m1, m2, m3) :
    return df24.loc[((df24.datetime//10000)%100 == m1) 
                    | ((df24.datetime//10000)%100 == m2) 
                    | ((df24.datetime//10000)%100 == m3)]

jja = get_season(6,7,8)
son = get_season(9,10,11)
djf = get_season(12,1,2)
mam = get_season(3,4,5)

In [32]:
# create csv files for each season

djf.to_csv('/pampa/cloutier/etc_24_nna_djf.csv', index = False)
mam.to_csv('/pampa/cloutier/etc_24_nna_mam.csv', index = False)
jja.to_csv('/pampa/cloutier/etc_24_nna_jja.csv', index = False)
son.to_csv('/pampa/cloutier/etc_24_nna_son.csv', index = False)