In [1]:
import pandas as pd

In [5]:
def add_season(df1) : 

    """
    Add a column called 'season' in df that gives the season in which the ETC occured. 
    If the ETC occured in two or more season, the chosen season will be the one in which 
    the ETC has the most grid point

    DJF : December, January & November
    MAM : March, April & May
    JJA : June, July & April
    SON : September, October and December
    
    Parameters : 
        df1 (dataframe) : Dataframe to which we want to add the season column

    returns : 
        df_new : Dataframe with the season column
    """

    seasons = { 'SON': [9, 10, 11], 'DJF': [12, 1, 2], 'MAM': [3, 4, 5], 'JJA': [6, 7, 8] }

    # Step 1 : Add 'month' column in dataframe 

    df1['month'] = (df1.datetime // 10000) % 100

    # Step 2 : Group the storms by their ID and count the number of grid point 
    #          in each month

    storm_seasons = df1.groupby(['storm', 'month']).size().unstack().fillna(0)

    # Step 3 : Determine the month with the maximum grid points for each storm

    storm_seasons['season'] = storm_seasons.idxmax(axis=1)
    
    # Step 4 : Transform month number into season

    """
    Steps for this line : 

        1. 'map' function is called on 'season' column to apply a function 
            on each element in the 'season' column. 
        2.  Inside 'map' function, there is a lambda function that takes 'month'
            as an input.
                a.  The function iterates over the 'seasons' dictionnary with 
                    season for season, months in seasons.items()
                b.  For each (season, months) pair in the dictionnary, the 
                    function checks if the given month is present in the list 
                    of months. 
                c.  If a match is found, it returns the season associated with the 
                    month list in the dictionnary. The (next) function is used to 
                    retreive the first match encountered. 
                d.  None is returned of no match is found. 
        3.  Because the lambda function is used on each value in the 'season' column with 
            'map', the resulting values are applied back to the 'season' column to change the 
            month number with the associated season. 
    """
    
    storm_seasons['season'] = storm_seasons['season'].map(
    lambda month: next((season for season, months in seasons.items() if month in months), None)
    )
    
    # Step 5 : Merge the season column into original dataframe
    
    df_new = df1.merge(storm_seasons['season'], on='storm', how='left')

    # Step 6 : Delete month column

    df_new = df_new.drop(['month'], axis = 1)
    
    # Step 7 : move season column next to datetime (TODO)
    
    #df_new.insert(3, 'season', df_new.pop('season'))

    return df_new


In [6]:
cat = pd.read_csv('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')

In [10]:
# Only keep relevant columns
cat_rel = cat[['storm', 'lifetime', 'datetime', 'latitude', 'longitude']]

# Add season column
cat_sn = add_season(cat_rel)

# save csv
cat_sn.to_csv('/pampa/cloutier/storm_tracks/NAEC/NAEC_1979-2020_season.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['month'] = (df1.datetime // 10000) % 100


In [9]:
cat_sn

Unnamed: 0,storm,lifetime,datetime,latitude,longitude,season
0,1,1,1979010101,41.25,275.75,DJF
1,1,2,1979010102,41.75,276.50,DJF
2,1,3,1979010103,42.50,277.25,DJF
3,1,4,1979010104,43.25,278.25,DJF
4,1,5,1979010105,43.75,279.00,DJF
...,...,...,...,...,...,...
1833492,24604,29,2020123119,32.00,315.00,DJF
1833493,24604,30,2020123120,31.75,315.00,DJF
1833494,24604,31,2020123121,31.75,315.00,DJF
1833495,24604,32,2020123122,31.75,315.00,DJF
