In [None]:
""""

Ce code prend en paramètre un fichier de tracjectoire de cyclones extratropicaux 
et ajoute deux colonnes : 
1 - La saison dans laquelle le cyclone a eu lieu
    de deux façons différentes : 
    a - La saison dans laquelle le cyclone a le plus de point de grille 
    b - La saison est attribuée ligne par ligne, selon la colonne 'datetime'. 
        Avec cette méthode, une saison peu alors être comptabilisée dans deux 
        saisons différentes.
2 - Un mask qui détermine si le centre du cyclone est dans le domaine du CRCM6 ou non.

"""

In [1]:
import pandas as pd
import xarray as xr

In [2]:
def add_season(df1) : 

    """
    Add a column called 'season' in df that gives the season in which the ETC occured. 
    If the ETC occured in two or more season, the chosen season will be the one in which 
    the ETC has the most grid point

    DJF : December, January & November
    MAM : March, April & May
    JJA : June, July & April
    SON : September, October and December
    
    Parameters : 
        df1 (dataframe) : Dataframe to which we want to add the season column

    returns : 
        df_new : Dataframe with the season column
    """

    seasons = { 'SON': [9, 10, 11], 'DJF': [12, 1, 2], 'MAM': [3, 4, 5], 'JJA': [6, 7, 8] }

    # Step 1 : Add 'month' column in dataframe 

    df1['month'] = (df1.datetime // 10000) % 100

    # Step 2 : Group the storms by their ID and count the number of grid point 
    #          in each month

    storm_seasons = df1.groupby(['storm', 'month']).size().unstack().fillna(0)

    # Step 3 : Determine the month with the maximum grid points for each storm

    storm_seasons['season'] = storm_seasons.idxmax(axis=1)
    
    # Step 4 : Transform month number into season

    """
    Steps for this line : 

        1. 'map' function is called on 'season' column to apply a function 
            on each element in the 'season' column. 
        2.  Inside 'map' function, there is a lambda function that takes 'month'
            as an input.
                a.  The function iterates over the 'seasons' dictionnary with 
                    season for season, months in seasons.items()
                b.  For each (season, months) pair in the dictionnary, the 
                    function checks if the given month is present in the list 
                    of months. 
                c.  If a match is found, it returns the season associated with the 
                    month list in the dictionnary. The (next) function is used to 
                    retreive the first match encountered. 
                d.  None is returned of no match is found. 
        3.  Because the lambda function is used on each value in the 'season' column with 
            'map', the resulting values are applied back to the 'season' column to change the 
            month number with the associated season. 
    """
    
    storm_seasons['season'] = storm_seasons['season'].map(
    lambda month: next((season for season, months in seasons.items() if month in months), None)
    )
    
    # Step 5 : Merge the season column into original dataframe
    
    df_new = df1.merge(storm_seasons['season'], on='storm', how='left')

    # Step 6 : Delete month column

    df_new = df_new.drop(['month'], axis = 1)
    
    # Step 7 : move season column next to datetime (TODO)
    
    #df_new.insert(3, 'season', df_new.pop('season'))

    return df_new


In [4]:
# read mask and catalogue files
file = '/pampa/picart/Masks/mask_GEM5_ERA5grid'
cat = pd.read_csv('~data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')
data = xr.open_dataset(file)

# export netcdf to dataframe and drop index
mask = data.to_dataframe().reset_index()
mask = mask.rename(columns={'lat' : 'latitude', 'lon' : 'longitude'})

In [5]:
# Only keep relevant columns
cat_rel = cat[['storm', 'lifetime', 'datetime', 'latitude', 'longitude', 'VORS_av08', 'VORSmax']]

# Add season column
cat_sn = add_season(cat_rel)

# Add HU column 
cat_sn_hu = cat_sn.merge(mask, how='left', on=['latitude', 'longitude'])
cat_sn_hu = cat_sn_hu.fillna(value = False)

# save csv
#cat_sn_hu.to_csv('/pampa/cloutier/storm_tracks/NAEC/NAEC_1979-2020_max_season.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['month'] = (df1.datetime // 10000) % 100


### create season column for each line

Create the season column with the month.

In [None]:
cat = pd.read_csv('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')

In [10]:
# Add month column in catalogue
cat['month'] = (cat['datetime'] // 10000) % 100

seasons = {'SON': [9, 10, 11], 'DJF': [12, 1, 2], 'MAM': [3, 4, 5], 'JJA': [6, 7, 8]}

# get the season based on the month in datetime
def get_season(month) : 
    for season, months in seasons.items() : 
        if month in months : 
            return season

# Add season according to month
cat['season'] = cat['month'].apply(get_season)
cat.drop(columns=['month'], inplace=True)

In [14]:
# save file as csv
cat.to_csv('/pampa/cloutier/storm_tracks/NAEC/NAEC_1979-2020_month_to_season.csv',index=False)