In [23]:
import pandas as pd
import xarray as xr
import numpy as np
import netCDF4 as nc
#from scipy.spatial import distance

In [24]:
# Open catalogue
df = pd.read_csv('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')

# open netcdf mask file
file = '/pampa/picart/Masks/mask_GEM5_ERA5grid'
data = xr.open_dataset(file)

# export netcdf to dataframe and drop index
mk = data.to_dataframe().reset_index()

# Only keep grid points coordinates that are within CRCM6 domain
mk = mk.loc[mk.HU == True]

# open crcm6 boundary layer 
bnd = pd.read_csv('/pampa/cloutier/outline_crcm6_domain.csv', index_col = 0)

In [34]:
# Chen filtre les ETC qui sont pendant au moins 24h CONSÉCUTIVES dans le domaine et dont le centre est
# à une distance minimale de 5° du bord du domaine de CRCM6.

# on doit donc déterminer une fonction qui calcule la distance entre un point de grille et le bord du 
# domaine de CRCM6

# Il faut que le point de grille ait une valeur HU == True et qu'il soit è une distance minimale de 5° de 
# tous les points de grille qui tracent la limite 
mk = mk.rename(columns={'lat' : 'latitude', 'lon' : 'longitude'})
merge = df.merge(mk, how='left', on=['latitude', 'longitude'])
merge = merge.fillna(value = False)
df24_consec = pd.DataFrame(columns = df.columns)

In [37]:
# Fonction qui détermine si le point donné dans le catalogue est à une distance 
# minimale de tous les points de grille du frame du domaine de crcm6 de 5°

def get_distance(latS, lonS, bnd) :
    dist_cond = True

    for _, row2 in bnd.iterrows():
        latD = row2['lat']
        lonD = row2['lon']
        dist = ((latS-latD)**2 + (lonS - lonD)**2)**0.5
        
        if dist < 5 : 
            dist_min = False
            break
            
    return dist_cond

In [85]:
# Iterate through each storm
for storm_id in merge['storm'].unique():
    storm_data = merge[merge['storm'] == storm_id].copy()
    count = 0
   
    # Iterate through each grid point
    for _, row in storm_data.iterrows() :
        dist_cond = False
        hu = row['HU']
        latS = row['latitude']
        lonS = row['longitude']
        
        # If the grid point is in the subdomain 
        if hu : 
            dist_cond = get_distance(latS, lonS, bnd)
    
        # No need to put and HU == True in the next if condition, because if dist_cond = true, 
        # it means that the if hu conditions was respected (dist_cond is reset to false for each 
        # grid point iteration )
        if dist_cond : 
            count += 1

        if hu == False and count >= 24 :
            break

        if hu == False and count < 24 :
            count = 0

    if count >= 24 : 
        df24_consec = df24_consec.append(storm_data)
        print('Year in process : ', df24_consec['datetime'].iloc[-1])
        print('Storm : ', df24_consec['storm'].iloc[-1])
        

Year in process :  1979010204
Storm :  1
Year in process :  1979010218
Storm :  3
Year in process :  1979010822
Storm :  12
Year in process :  1979011015
Storm :  13


KeyboardInterrupt: 

In [31]:

        mk_filtered = mk.loc[(mk['latitude'] <= latS - 5) | (mk['latitude'] >= latS + 5) |
                             (mk['longitude'] <= lonS - 5) | (mk['longitude'] >= lonS + 5)]

28

In [1]:
import pandas as pd
import math
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
import xarray as xr

""" 

    Maxine Cloutier-Gervais

Created : 

    June 7th, 2023

Info : 
    
    This code creates and filters, a file that contains ETC that were active for 24 consecutives 
    hours or more in the crcm6 domain.

"""

def open_cat_mask(cat_in, bnd_in, mask_in) : 

    """
    Open NAEC catalogue, boundary file  and transform netCDF mask into dataframe 

    Parameters  : 
        cat_in  : path of the catalogue csv file
        mask_in : path of the mask netCDF file
        bnd_in  : path of the csv file that defines the boundary grid points
                  of CRCM6 domain
                    
    Returns : 
        cat : Dataframe containing NAEC catalogue data
        mk  : Dataframe containing mask data
        bnd : Dataframe containing boundary grid points 

    """
    
    # Step 1 : Open catalogue and boundary csv file
    print('lecture cat...')
    cat = pd.read_csv(cat_in)
    print('lecture bnd...')
    bnd = pd.read_csv(bnd_in, index_col = 0)

    # Step 2 : Open mask netCDF file and convert into dataframe
    print('lecture mask...')
    mk = xr.open_dataset(mask_in)
    mk = mk.to_dataframe()

    # Step 3 :  Drop index lat lon, but keep columns
    mk = mk.reset_index()

    # Step 4 : Rename lat & lon columns for latitude & longitude
    mk = mk.rename(columns={'lat' : 'latitude', 'lon' : 'longitude'})

    return cat, bnd, mk


def get_distance(latS, lonS, bnd) : 

    """
    Determine if a given grid point is at a minimal distance of 5deg from
    all CRCM6 boundary grid point domain

    Paramters : 
        latS  : Latitude of the catalogue grid point
        lonS  : Longitude of the catalogue grid point
        bnd   : Dataframe containing boundary grid points

    Returns       : 
        dist_cond : True if all grid points are within a minimal distance of 5deg
                    from all boundary layer grid points and False if not.
    """

    dist_cond = True
    
    # filter out boundary grid points to restrict search
    bnd_filt = bnd.loc[(bnd['lat'] < latS - 6) | (bnd['lat'] > latS + 6) |
                       (bnd['lon'] < lonS - 6) | (bnd['lon'] > lonS + 6)]
    
    for _, row1 in bnd_filt.iterrows():
        latD = row1['lat']
        lonD = row1['lon']
        dist = ((latS-latD)**2 + (lonS - lonD)**2)**0.5
        
        if dist < 5 : 
            dist_min = False
            break
            
    return dist_cond




def add_season(df, output_file) : 

    """
    Add a column called 'season' in df24 that gives the season in which the ETC occured. 
    If the ETC occured in two or more season, the chosen season will be the one in which 
    the ETC has the most grid point

    DJF : December, January & November
    MAM : March, April & May
    JJA : June, July & April
    SON : September, October and December
    
    Parameters : 
        df (dataframe) : Dataframe to which we want to add the season column

    returns : 
        df_new : 
    """

    seasons = { 'SON': [9, 10, 11], 'DJF': [12, 1, 2], 'MAM': [3, 4, 5], 'JJA': [6, 7, 8] }

    # Step 1 : Add 'month' column in dataframe 

    df['month'] = (df.datetime // 10000) % 100

    # Step 2 : Group the storms by their ID and count the number of grid point 
    #          in each month

    storm_seasons = df.groupby(['storm', 'month']).size().unstack().fillna(0)

    # Step 3 : Determine the month with the maximum grid points for each storm

    storm_seasons['season'] = storm_seasons.idxmax(axis=1)
    
    # Step 4 : Transform month number into season
    
    storm_seasons['season'] = storm_seasons['season'].map(
    lambda month: next((season for season, months in seasons.items() if month in months), None)
    )
    
    # Step 5 : Merge the season column into original dataframe
    
    df_new = df.merge(storm_seasons['season'], on='storm', how='left')

    # Step 6 : Delete month column

    df_new = df_new.drop(['month'], axis = 1)
    
    # Step 7 : move season column next to datetime (TODO)
    
    #df_new.insert(3, 'season', df_new.pop('season'))

    return df_new



""" MAIN PROGRAM """


# Step 1 : Open catalogue, boundary catalogue and mask
cat_in = ('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')
bnd_in = ('/pampa/cloutier/outline_crcm6_domain.csv')
mask_in = ('/pampa/picart/Masks/mask_GEM5_ERA5grid')

cat, bnd, mk = open_cat_mask(cat_in, bnd_in, mask_in)

# Step 2 : Merge cat and mask to add HU column in cat
#cat = cat.loc[(cat.storm == 3) | (cat.storm == 1)]
merge = cat.merge(mk, how='left', on=['latitude', 'longitude'])
merge = merge.fillna(value = False)

# Step 3 : Initialize empty dataframe that will contain the final result

df24 = pd.DataFrame(columns = cat.columns)

# Step 4 : Filter catalogue data

# Iterate through each storm 
for storm_id in merge['storm'].unique():
    storm_data = merge[merge['storm'] == storm_id].copy() # copy of merge for the given storm
    count = 0 # lifetime count
    stInDom=[]
    print('stordID = ', storm_id)
    
    # Iterate through each grid point of the storm
    for _, row in storm_data.iterrows() : 
        hu = row['HU']
        cond = False
        latS = row['latitude']
        lonS= row['longitude']
        
        # check if storm center is within subdomain and at a 5° minimal 
        # distance from boundaries
        if hu : 
            #print('check distance ...')
            cond = get_distance(latS, lonS, bnd) 
        stInDom.append(cond)
    
    # add a new column that determines if each storm center agrees or not with the above condition
    #print('adding StInDom in storm_data ...')
    storm_data['StInDom'] = stInDom
    n = 23
    # exclude the last 23 lines in the search
    #storm_data_rows = storm_data.head(len(storm_data) - n)
    storm_data_rows = storm_data.iloc[:-n]
    
    count = 0
    
    #print('second for loop : ', row['storm'])
    for idx, row in storm_data_rows.iterrows() :
        if row['StInDom'] == True : 
            #print('Initializing count to one')
            count = 1
            
            # keep iterating for the next 23 grid points
            for _, row in islice(storm_data.iterrows(), idx, idx+23) : 
                if row['StInDom'] : 
                    count += 1
                    print(count)
                
                else :  
                    #print(row['StInDom'], 'storm not in domain')
                    break
            
            if count >= 24 : 
                break
                
    print(count)                           
    if count >= 24 :
        df24 = df24.append(storm_data)
        print('Year in process : ', df24['datetime'].iloc[-1])      

lecture cat...


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [124]:
import pandas as pd
import math
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
import xarray as xr

""" 

    Maxine Cloutier-Gervais

Created : 

    June 7th, 2023

Info : 
    
    This code creates and filters, a file that contains ETC that were active for 24 consecutives 
    hours or more in the crcm6 domain.

"""

def open_cat_mask(cat_in, bnd_in, mask_in) : 

    """
    Open NAEC catalogue, boundary file  and transform netCDF mask into dataframe 

    Parameters  : 
        cat_in  : path of the catalogue csv file
        mask_in : path of the mask netCDF file
        bnd_in  : path of the csv file that defines the boundary grid pointsd
                  of CRCM6 domain
                    
    Returns : 
        cat : Dataframe containing NAEC catalogue data
        mk  : Dataframe containing mask data
        bnd : Dataframe containing boundary grid points 

    """
    
    # Step 1 : Open catalogue and boundary csv file
    print('lecture cat...')
    cat = pd.read_csv(cat_in)
    print('lecture bnd...')
    bnd = pd.read_csv(bnd_in, index_col = 0)

    # Step 2 : Open mask netCDF file and convert into dataframe
    print('lecture mask...')
    mk = xr.open_dataset(mask_in)
    mk = mk.to_dataframe()

    # Step 3 :  Drop index lat lon, but keep columns
    mk = mk.reset_index()

    # Step 4 : Rename lat & lon columns for latitude & longitude
    mk = mk.rename(columns={'lat' : 'latitude', 'lon' : 'longitude'})

    return cat, bnd, mk


def get_distance(latS, lonS, bnd) : 

    """
    Determine if a given grid point is at a minimal distance of 5deg from
    all CRCM6 boundary grid point domain

    Paramters : 
        latS  : Latitude of the catalogue grid point
        lonS  : Longitude of the catalogue grid point
        bnd   : Dataframe containing boundary grid points

    Returns       : 
        dist_cond : True if all grid points are within a minimal distance of 5deg
                    from all boundary layer grid points and False if not.
    """

    dist_cond = True
    
    # filter out boundary grid points to restrict search
    bnd_filt = bnd.loc[(bnd['lat'] < latS - 6) | (bnd['lat'] > latS + 6) |
                       (bnd['lon'] < lonS - 6) | (bnd['lon'] > lonS + 6)]
    
    for _, row1 in bnd_filt.iterrows():
        latD = row1['lat']
        lonD = row1['lon']
        dist = ((latS-latD)**2 + (lonS - lonD)**2)**0.5
        
        if dist < 5 : 
            dist_min = False
            break
            
    return dist_cond




def add_season(df, output_file) : 

    """
    Add a column called 'season' in df24 that gives the season in which the ETC occured. 
    If the ETC occured in two or more season, the chosen season will be the one in which 
    the ETC has the most grid point

    DJF : December, January & November
    MAM : March, April & May
    JJA : June, July & April
    SON : September, October and December
    
    Parameters : 
        df (dataframe) : Dataframe to which we want to add the season column

    returns : 
        df_new : 
    """

    seasons = { 'SON': [9, 10, 11], 'DJF': [12, 1, 2], 'MAM': [3, 4, 5], 'JJA': [6, 7, 8] }

    # Step 1 : Add 'month' column in dataframe 

    df['month'] = (df.datetime // 10000) % 100

    # Step 2 : Group the storms by their ID and count the number of grid point 
    #          in each month

    storm_seasons = df.groupby(['storm', 'month']).size().unstack().fillna(0)

    # Step 3 : Determine the month with the maximum grid points for each storm

    storm_seasons['season'] = storm_seasons.idxmax(axis=1)
    
    # Step 4 : Transform month number into season
    
    storm_seasons['season'] = storm_seasons['season'].map(
    lambda month: next((season for season, months in seasons.items() if month in months), None)
    )
    
    # Step 5 : Merge the season column into original dataframe
    
    df_new = df.merge(storm_seasons['season'], on='storm', how='left')

    # Step 6 : Delete month column

    df_new = df_new.drop(['month'], axis = 1)
    
    # Step 7 : move season column next to datetime (TODO)
    
    #df_new.insert(3, 'season', df_new.pop('season'))

    return df_new



""" MAIN PROGRAM """


# Step 1 : Open catalogue, boundary catalogue and mask
cat_in = ('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')
bnd_in = ('/pampa/cloutier/outline_crcm6_domain.csv')
mask_in = ('/pampa/picart/Masks/mask_GEM5_ERA5grid')

#cat, bnd, mk = open_cat_mask(cat_in, bnd_in, mask_in)

# Step 2 : Merge cat and mask to add HU column in cat
#cat = cat.loc[(cat.storm == 3) | (cat.storm == 1)]
merge = cat.merge(mk, how='left', on=['latitude', 'longitude'])
merge = merge.fillna(value = False)

# Step 3 : Initialize empty dataframe that will contain the final result

df24 = pd.DataFrame(columns = cat.columns)

# Step 4 : Filter catalogue data

# Iterate through each storm 
for storm_id in merge['storm'].unique():
    storm_data = merge[merge['storm'] == storm_id].copy() # copy of merge for the given storm
    count = 0 # lifetime count
    stInDom=[]

    # Iterate through each grid point of the storm
    for _, row in storm_data.iterrows() : 
        hu = row['HU']
        cond = False
        latS = row['latitude']
        lonS= row['longitude']
        
        # check if storm center is within subdomain and at a 5° minimal 
        # distance from boundaries
        if hu : 
            #print('check distance ...')
            cond = get_distance(latS, lonS, bnd) 
        stInDom.append(cond)
    
    # add a new column that determines if each storm center agrees or not with the above condition
    #print('adding StInDom in storm_data ...')
    storm_data['StInDom'] = stInDom
    n = 23
    # exclude the last 23 lines in the search
    #storm_data_rows = storm_data.head(len(storm_data) - n)
    storm_data_rows = storm_data.iloc[:-n]
    
    count = 0
    
    #print('second for loop : ', row['storm'])
    for idx, row in storm_data_rows.iterrows():
        if row['StInDom'] == True:
            count = 1
        
        # Iterate through the next 23 rows or until the end of the storm_data
            for i in range(idx + 1, min(idx + 24, len(storm_data))):
                if storm_data.loc[i, 'StInDom'] == True:
                    count += 1
                else:
                    break
    print(count)
#         if count >= 24:
#             df24 = df24.append(storm_data)
#             print('Year in process:', df24['datetime'].iloc[-1])
#             break


24
1


In [2]:
# Step 1 : Open catalogue, boundary catalogue and mask
cat_in = ('/home/data/ReAnalysis/ERA5/Storm_analysis/NAECv1/NAEC_1979_2020_v1.csv')
bnd_in = ('/pampa/cloutier/outline_crcm6_domain.csv')
mask_in = ('/pampa/picart/Masks/mask_GEM5_ERA5grid')

cat, bnd, mk = open_cat_mask(cat_in, bnd_in, mask_in)

# Step 2 : Merge cat and mask to add HU column in cat
#cat = cat.loc[(cat.storm == 1) | (cat.storm == 2) | (cat.storm == 3)]
merge = cat.merge(mk, how='left', on=['latitude', 'longitude'])
merge = merge.fillna(value = False)

# Step 3 : Initialize empty dataframe that will contain the final result

df24 = pd.DataFrame(columns = cat.columns)

for storm_id, group in merge.groupby('storm'):
    print('storm ... ', storm_id)
    stInDom = group['HU'] & group.apply(lambda row: get_distance(row['latitude'], row['longitude'], bnd), axis=1)
    count = 0
    print('count = ', count)
    print(stInDom)
    for value in stInDom:
        if value:
            count += 1
            print(count)
            if count >= 24:
                df24 = df24.append(group)
                print('Year in process:', df24['datetime'].iloc[-1])
                break
        else:
            count = 0


lecture cat...
lecture bnd...
lecture mask...
storm ...  1
0
0     True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
11    True
12    True
13    True
14    True
15    True
16    True
17    True
18    True
19    True
20    True
21    True
22    True
23    True
24    True
25    True
26    True
27    True
dtype: bool
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
Year in process: 1979010204
storm ...  2
0
28    False
29    False
30    False
31    False
32    False
33    False
34    False
35    False
36    False
37    False
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
47    False
48    False
49    False
50    False
51    False
52    False
53    False
54    False
55    False
56    False
57    False
58    False
dtype: bool
storm ...  3
0
59    False
60    False
61    False
62    False
63     True
64     True
65     True
66     True
67     True
68    

KeyboardInterrupt: 

In [28]:
etc24 = pd.read_csv('/pampa/cloutier/etc24_consec_v3.csv')

In [30]:
etc24

Unnamed: 0,storm,lifetime,datetime,latitude,longitude,MSLPmin,VORSmax,VORS_av02,VORS_av04,VORS_av06,...,PR98_ex06,PR98_ex08,PR98_ex10,PR99_ex02,PR99_ex04,PR99_ex06,PR99_ex08,PR99_ex10,HU,season
0,1,1,1979010101,41.25,275.75,1007.0,0.000094,0.000062,0.000045,0.000028,...,-9999.0000,-9999.0000,-9999.0000,-9999.00000,-9999.00000,-9999.0000,-9999.0000,-9999.0000,True,DJF
1,1,2,1979010102,41.75,276.50,1007.0,0.000098,0.000067,0.000048,0.000030,...,-9999.0000,-9999.0000,-9999.0000,-9999.00000,-9999.00000,-9999.0000,-9999.0000,-9999.0000,True,DJF
2,1,3,1979010103,42.50,277.25,1007.0,0.000098,0.000074,0.000053,0.000033,...,-9999.0000,-9999.0000,-9999.0000,-9999.00000,-9999.00000,-9999.0000,-9999.0000,-9999.0000,True,DJF
3,1,4,1979010104,43.25,278.25,1007.0,0.000100,0.000077,0.000056,0.000035,...,-9999.0000,-9999.0000,-9999.0000,-9999.00000,-9999.00000,-9999.0000,-9999.0000,-9999.0000,True,DJF
4,1,5,1979010105,43.75,279.00,1005.0,0.000104,0.000079,0.000059,0.000037,...,-9999.0000,-9999.0000,-9999.0000,-9999.00000,-9999.00000,-9999.0000,-9999.0000,-9999.0000,True,DJF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
617502,24602,120,2020123119,74.50,299.00,978.1,0.000064,0.000050,0.000038,0.000035,...,0.3422,0.3408,0.3177,0.00000,0.26870,0.5088,0.4935,0.4699,False,DJF
617503,24602,121,2020123120,75.25,297.75,978.1,0.000066,0.000054,0.000041,0.000036,...,0.2739,0.3372,0.2993,0.00000,0.09537,0.3751,0.4395,0.4261,False,DJF
617504,24602,122,2020123121,76.00,296.25,978.1,0.000069,0.000057,0.000043,0.000037,...,0.2326,0.2914,0.2653,0.09362,0.09913,0.3551,0.4105,0.3670,False,DJF
617505,24602,123,2020123122,76.75,295.25,978.9,0.000072,0.000059,0.000045,0.000037,...,0.1968,0.2612,0.2572,0.01673,0.14920,0.3698,0.4492,0.4291,False,DJF


In [63]:
mk.loc[(mk.HU == True)]

Unnamed: 0,latitude,longitude,HU
1352,31.25,291.75,True
1591,31.50,291.25,True
1592,31.50,291.50,True
1593,31.50,291.75,True
1594,31.50,292.00,True
...,...,...,...
34765,66.00,270.25,True
34997,66.25,268.00,True
34998,66.25,268.25,True
34999,66.25,268.50,True


In [64]:
etc24.loc[(etc24.latitude == 31.25) & (etc24.longitude == 291.75)]

Unnamed: 0,storm,lifetime,datetime,latitude,longitude,MSLPmin,VORSmax,VORS_av02,VORS_av04,VORS_av06,...,PR98_ex06,PR98_ex08,PR98_ex10,PR99_ex02,PR99_ex04,PR99_ex06,PR99_ex08,PR99_ex10,HU,season
179273,7381,18,1991090508,31.25,291.75,1010.0,7e-05,5.4e-05,3e-05,1.2e-05,...,2.027,1.997,1.997,0.0,2.095,2.046,2.046,2.046,True,SON
365807,14780,62,2004032914,31.25,291.75,1012.0,5.6e-05,4.4e-05,3.2e-05,2.1e-05,...,1.203,1.175,1.175,1.032,1.454,1.301,1.263,1.263,True,MAM
365810,14780,65,2004032917,31.25,291.75,1011.0,6.1e-05,4.9e-05,3.5e-05,2.3e-05,...,1.093,1.018,1.018,0.7142,0.9996,0.9314,0.9314,0.9314,True,MAM
447768,17970,53,2009082204,31.25,291.75,972.9,0.000195,0.000154,7.8e-05,2.9e-05,...,2.471,2.471,2.471,3.439,2.569,2.569,2.569,2.569,True,JJA


In [65]:
s7381 = etc24.loc[etc24.storm == 7381]
s14780 = etc24.loc[etc24.storm == 14780]
s17970 = etc24.loc[etc24.storm == 17970]

In [40]:
s2516.groupby(['HU']).count()

Unnamed: 0_level_0,storm,lifetime,datetime,latitude,longitude,MSLPmin,VORSmax,VORS_av02,VORS_av04,VORS_av06,...,PR98_ex04,PR98_ex06,PR98_ex08,PR98_ex10,PR99_ex02,PR99_ex04,PR99_ex06,PR99_ex08,PR99_ex10,season
HU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,73,73,73,73,73,73,73,73,73,73,...,73,73,73,73,73,73,73,73,73,73
True,47,47,47,47,47,47,47,47,47,47,...,47,47,47,47,47,47,47,47,47,47


In [66]:
pd.set_option('display.max_rows', s7381.shape[0] + 1)
pd.set_option('display.max_rows', s14780.shape[0] + 1)
pd.set_option('display.max_rows', s17970.shape[0] + 1)

Unnamed: 0,storm,lifetime,datetime,latitude,longitude,MSLPmin,VORSmax,VORS_av02,VORS_av04,VORS_av06,...,PR98_ex06,PR98_ex08,PR98_ex10,PR99_ex02,PR99_ex04,PR99_ex06,PR99_ex08,PR99_ex10,HU,season
365746,14780,1,2004032701,46.75,286.75,1019.0,6.6e-05,5.4e-05,3.5e-05,2.1e-05,...,0.9456,0.9709,0.9599,0.9073,0.8277,0.7043,0.7449,0.7358,True,MAM
365747,14780,2,2004032702,46.75,287.25,1019.0,6.7e-05,5.6e-05,3.7e-05,2.1e-05,...,0.9016,0.8891,0.8979,1.017,0.8856,0.7666,0.7478,0.7433,True,MAM
365748,14780,3,2004032703,46.0,286.75,1019.0,6.7e-05,5.6e-05,3.6e-05,2e-05,...,0.8452,0.785,0.8251,0.7963,0.789,0.7193,0.694,0.6996,True,MAM
365749,14780,4,2004032704,46.25,287.5,1019.0,6.6e-05,5.7e-05,3.7e-05,2.1e-05,...,0.7052,0.6836,0.7074,0.5753,0.5718,0.4892,0.4825,0.4892,True,MAM
365750,14780,5,2004032705,46.0,287.75,1019.0,6.5e-05,5.6e-05,3.7e-05,2.1e-05,...,0.5832,0.5796,0.5865,0.1762,0.4366,0.3871,0.4115,0.3885,True,MAM
365751,14780,6,2004032706,45.75,288.0,1019.0,6.4e-05,5.4e-05,3.6e-05,2e-05,...,0.5584,0.5689,0.5464,0.0,0.4426,0.4085,0.4455,0.3902,True,MAM
365752,14780,7,2004032707,45.75,288.75,1019.0,6.4e-05,5.2e-05,3.5e-05,2e-05,...,0.2558,0.2864,0.2554,0.0,0.2,0.1999,0.2649,0.2649,True,MAM
365753,14780,8,2004032708,45.25,288.75,1019.0,6.4e-05,5e-05,3.3e-05,1.9e-05,...,0.3009,0.3039,0.2728,0.0,0.1253,0.1111,0.1434,0.1434,True,MAM
365754,14780,9,2004032709,45.0,289.0,1019.0,6.4e-05,4.9e-05,3.2e-05,1.8e-05,...,0.3757,0.3598,0.3311,0.0,0.1786,0.1883,0.1771,0.1771,True,MAM
365755,14780,10,2004032710,44.75,289.0,1019.0,6.3e-05,4.9e-05,3.2e-05,1.8e-05,...,0.4637,0.4258,0.4151,0.1256,0.2016,0.2473,0.2216,0.2157,True,MAM


In [69]:
s17970

Unnamed: 0,storm,lifetime,datetime,latitude,longitude,MSLPmin,VORSmax,VORS_av02,VORS_av04,VORS_av06,...,PR98_ex06,PR98_ex08,PR98_ex10,PR99_ex02,PR99_ex04,PR99_ex06,PR99_ex08,PR99_ex10,HU,season
447716,17970,1,2009082000,20.0,301.5,961.7,0.000182,0.00014,7.1e-05,3e-05,...,3.687,3.687,3.687,8.103,5.575,4.259,4.259,4.259,False,JJA
447717,17970,2,2009082001,20.0,301.25,962.1,0.000182,0.000141,7.1e-05,3e-05,...,3.963,3.963,3.963,7.878,5.688,4.567,4.567,4.567,False,JJA
447718,17970,3,2009082002,20.25,301.25,962.1,0.000183,0.000144,7.1e-05,2.9e-05,...,3.773,3.773,3.589,6.51,5.193,4.65,4.65,4.65,False,JJA
447719,17970,4,2009082003,20.25,301.0,964.0,0.000182,0.000144,7.1e-05,2.9e-05,...,3.339,3.339,3.219,6.56,4.578,3.87,3.87,3.851,False,JJA
447720,17970,5,2009082004,20.5,300.75,965.7,0.000182,0.000144,7.1e-05,2.9e-05,...,3.27,3.217,3.095,6.646,4.892,3.763,3.734,3.734,False,JJA
447721,17970,6,2009082005,20.5,300.5,965.7,0.000181,0.000144,7.1e-05,2.9e-05,...,3.482,3.465,3.422,7.05,5.087,3.883,3.868,3.868,False,JJA
447722,17970,7,2009082006,20.75,300.25,966.5,0.000181,0.000143,7.1e-05,2.8e-05,...,3.433,3.423,3.322,7.08,5.115,3.707,3.707,3.684,False,JJA
447723,17970,8,2009082007,20.75,299.75,967.4,0.000181,0.000142,7.2e-05,2.9e-05,...,3.639,3.639,3.639,4.725,3.828,3.828,3.828,3.828,False,JJA
447724,17970,9,2009082008,21.0,299.5,967.5,0.000181,0.000142,7.2e-05,2.9e-05,...,3.192,3.192,3.192,5.083,3.56,3.381,3.381,3.381,False,JJA
447725,17970,10,2009082009,21.25,299.25,967.1,0.000182,0.000143,7.2e-05,2.9e-05,...,3.048,3.048,3.048,4.93,3.482,3.212,3.212,3.212,False,JJA


In [91]:
def get_distance(latS, lonS, bnd) : 

    """
    Determine if a given grid point is at a minimal distance of 5deg from
    all CRCM6 boundary grid point domain

    Paramters : 
        latS  : Latitude of the catalogue grid point
        lonS  : Longitude of the catalogue grid point
        bnd   : Dataframe containing boundary grid points

    Returns       : 
        dist_cond : True if all grid points are within a minimal distance of 5deg
                    from all boundary layer grid points and False if not.
    """

    dist_cond = True
    
    # filter out boundary grid points to restrict search
    #bnd_filt = bnd.loc[(bnd['lat'] < latS - 6) | (bnd['lat'] > latS + 6) |
                       #(bnd['lon'] < lonS - 6) | (bnd['lon'] > lonS + 6)]
    
    for _, row1 in bnd.iterrows():
        latD = row1['lat']
        lonD = row1['lon']
        dist = ((latS-latD)**2 + (lonS - lonD)**2)**0.5
        
        if dist < 5 : 
            print('distance : ', dist, '\n')
            dist_cond = False
            break
            
    return dist_cond


In [92]:
for _, row in s17970.iterrows() : 
    if row['latitude'] == 31.25 and row['longitude'] == 291.75 : 
        cond = get_distance(row['latitude'], row['longitude'], bnd)
        print('cond = ', cond)
    

distance :  0.0 

cond =  False


In [81]:
bnd

Unnamed: 0,lat,lon,HU
1352,31.25,291.75,True
1591,31.50,291.25,True
1592,31.50,291.50,True
1593,31.50,291.75,True
1594,31.50,292.00,True
...,...,...,...
34765,66.00,270.25,True
34997,66.25,268.00,True
34998,66.25,268.25,True
34999,66.25,268.50,True
