# Calculate Quasi-Stationary Metrics from the filtered MedCyclone composite table
Apply different stationarity definitions to the dataset and categorise the tracks based on the definitions.

In [None]:
# load python packages
import pandas as pd
import numpy as np
from scipy.io import loadmat

import QS_functions as QSfunc

In [None]:
# Load df_Medcrossers
df_Medcrossers = pd.read_csv('../output/TRACKS_CL5_onlyMedcrossers.csv', header=0)  # or '../output/other_TRACKS/TRACKS_CL#_onlyMedcrossers.csv' for other CL numbers
# Contains only tracks crossing the Mediterranean Basin
print(df_Medcrossers.head())


   id    lon     lat  year  month  day  time      hPa
0   4  0.960  40.700  1979      1    8    18  1016.93
1   4  1.080  40.761  1979      1    8    19  1016.93
2   4  1.300  40.746  1979      1    8    20  1016.64
3   4  1.624  40.699  1979      1    8    21  1016.35
4   4  1.986  40.693  1979      1    8    22  1015.86


## Full-Track Stationarity
The following two metrics focus on distances covered in the lifetime of the cyclone.

### Median Velocity "FT_MED_VEL"
- based on median propagation speed of a cyclone over its whole lifetime

In [14]:
# Calculate median propagation speed (per hour) for each cyclone
# --------------------------------------------------------------
# For each unique cyclone ID in the dataframe:
#   - select its track points (lat/lon)
#   - compute the distance between consecutive points (1-hour time steps)
#   - store the median speed of each cyclone
#
# Cyclone categorisation (slow, average, fast) is based on quantiles
# and is handled using a helper function to assign quantile ranks and categories.
# Resulting values are added to the main dataframe.
# --------------------------------------------------------------

list_med_vel = []

for cyclone_id in df_Medcrossers['id'].unique():
    cyclone = df_Medcrossers[df_Medcrossers['id'] == cyclone_id].reset_index(drop=True)
    if len(cyclone) < 2:
        continue
    dists = [
        QSfunc.haversine(cyclone.loc[t, 'lon'], cyclone.loc[t, 'lat'],
                     cyclone.loc[t+1, 'lon'], cyclone.loc[t+1, 'lat'])
        for t in range(len(cyclone) - 1)
    ]
    list_med_vel.append(np.median(dists))

median_vel = np.array(list_med_vel)

# Create DataFrame with quantiles and categories
df_med_vel = QSfunc.assign_quantiles_and_categories_FT(median_vel, df_Medcrossers, 'FT_MED_VEL')

# Merge with original dataset
df_QS_Medcrossers = df_Medcrossers.copy()
df_QS_Medcrossers = df_QS_Medcrossers.merge(df_med_vel, on='id', how='left')

### Total Distance "FT_TOT_DIST" ('Path Stationarity' as in Aregger, 2021)
- based on maximum distance that a cyclone can travel over its whole lifetime
- calculated by summing up the distances between each observational timestep

In [15]:
# Calculate total distance for each cyclone
# --------------------------------------------------------------
# For each unique cyclone ID in the dataframe:
#   - select its track points (lat/lon)
#   - compute the distance between consecutive points (1-hour time steps)
#   - store the median speed of each cyclone
#
# Cyclone categorisation (slow, average, fast) is based on quantiles
# and is handled using a helper function to assign quantile ranks and categories.
# Resulting values are added to the main dataframe.
# --------------------------------------------------------------

list_tot_dist = []

for cyclone_id in df_Medcrossers['id'].unique():
    cyclone = df_Medcrossers[df_Medcrossers['id'] == cyclone_id].reset_index(drop=True)
    if len(cyclone) < 2:
        continue
    dists = [
        QSfunc.haversine(cyclone.loc[t, 'lon'], cyclone.loc[t, 'lat'],
                     cyclone.loc[t+1, 'lon'], cyclone.loc[t+1, 'lat'])
        for t in range(len(cyclone) - 1)
    ]
    list_tot_dist.append(np.sum(dists))

tot_dist = np.array(list_tot_dist)

# Create DataFrame with quantiles and categories
df_tot_dist = QSfunc.assign_quantiles_and_categories_FT(tot_dist, df_Medcrossers, 'FT_TOT_DIST')

# Merge with original data
df_QS_Medcrossers = df_QS_Medcrossers.merge(df_tot_dist, on='id', how='left')

## Along Track Stationarity

The following three metrics focus on distances covered in a 12-hr timescale.

Users may change this window by altering the following variable:

In [5]:
window = 12

### 12-hour Distance "AT_12h_DIST"
- total path distance travelled by a cyclone over 12 consecutive hourly steps

In [6]:
def calculate_dist12h_df(df):
    list_dist_12hrs = []
    for id_val, group in df.groupby('id'):
        lons = group['lon'].values
        lats = group['lat'].values
        for i in range(len(group) - window):
            dist_sum = sum([QSfunc.haversine(lons[j], lats[j], lons[j+1], lats[j+1]) for j in range(i, i+window)])
            list_dist_12hrs.append([id_val, lons[i], lats[i], dist_sum])
    return pd.DataFrame(list_dist_12hrs, columns=['id', 'lon', 'lat', 'dist_sum'])

In [7]:
df_12hr = calculate_dist12h_df(df_Medcrossers)
new_cols_12hr = QSfunc.categorise_distances_AT(df_12hr, df_Medcrossers, window, 'AT_12h_DIST')

TypeError: rankdata() got an unexpected keyword argument 'nan_policy'

In [None]:
df_QS_Medcrossers = pd.concat([df_QS_Medcrossers, new_cols_12hr], axis=1)

NameError: name 'new_cols_12hr' is not defined

### Radial Distance "AT_RAD_DIST"
- computes the sum of straight-line distances from a reference point to each of the next 12 hourly positions (like spokes of a wheel)

In [9]:
def calculate_radial_df(df):
    list_dist_rad = []
    for id_val, group in df.groupby('id'):
        lons = group['lon'].values
        lats = group['lat'].values
        for i in range(len(group) - window):
            dist_sum = sum([QSfunc.haversine(lons[i], lats[i], lons[j], lats[j]) for j in range(i+1, i+window+1)])    ######################
            list_dist_rad.append([id_val, lons[i], lats[i], dist_sum])
    return pd.DataFrame(list_dist_rad, columns=['id', 'lon', 'lat', 'dist_sum'])

In [10]:
df_radial = calculate_radial_df(df_Medcrossers)
new_cols_rad = QSfunc.categorise_distances_AT(df_radial, df_Medcrossers, window, 'AT_RAD_DIST')

TypeError: rankdata() got an unexpected keyword argument 'nan_policy'

In [None]:
df_QS_Medcrossers = pd.concat([df_QS_Medcrossers, new_cols_rad], axis=1)

### Circle Distance "AT_CIRCLE_DIST"
- finds the maximum distance from a reference point to any of the next 12 positions, defining the radius of the smallest circle that contains them

In [None]:
def calculate_circle_df(df):
    list_circle = []
    for id_val, group in df.groupby('id'):
        if len(group) <= window:
            continue
        for i in range(len(group) - window):
            window_ = group.iloc[i:i + window+1]
            center_lon, center_lat = window_.iloc[0][['lon', 'lat']]
            lons = window_['lon'].values[1:]
            lats = window_['lat'].values[1:]
            distances = QSfunc.haversine(center_lon, center_lat, lons, lats)
            max_distance = distances.max()
            list_circle.append([id_val, center_lon, center_lat, max_distance])
    return pd.DataFrame(list_circle, columns=['id', 'lon', 'lat', 'dist_sum'])

In [None]:
df_circle = calculate_circle_df(df_Medcrossers)
new_cols_circle = QSfunc.categorise_distances_AT(df_circle, df_Medcrossers, window, 'AT_CIRCLE_DIST')

In [None]:
df_QS_Medcrossers = pd.concat([df_QS_Medcrossers, new_cols_circle], axis=1)

## Save final QS Table as csv file

In [None]:
# Final DataFrame with all calculated features
print(df_QS_Medcrossers.head())
df_QS_Medcrossers.to_csv('../output/TRACKS_CL5_QS_Medcrossers.csv', index=False, header=True)  # or '../output/other_TRACKS/TRACKS_CL#_QS_Medcrossers.csv' for other CL numbers

   id    lon     lat  year  month  day  time      hPa  FT_MED_VEL_v  \
0   4  0.960  40.700  1979      1    8    18  1016.93        31.008   
1   4  1.080  40.761  1979      1    8    19  1016.93        31.008   
2   4  1.300  40.746  1979      1    8    20  1016.64        31.008   
3   4  1.624  40.699  1979      1    8    21  1016.35        31.008   
4   4  1.986  40.693  1979      1    8    22  1015.86        31.008   

   FT_MED_VEL_q  FT_MED_VEL_c  FT_TOT_DIST_v  FT_TOT_DIST_q  FT_TOT_DIST_c  
0         0.831           0.0       1355.115          0.391            0.0  
1         0.831           0.0       1355.115          0.391            0.0  
2         0.831           0.0       1355.115          0.391            0.0  
3         0.831           0.0       1355.115          0.391            0.0  
4         0.831           0.0       1355.115          0.391            0.0  
