# Calculate Quasi-Stationary Metrics from the filtered MedCyclone composite table
Apply different stationarity definitions to the dataset and detect the most  slow-moving or transient cyclone tracks in time and space.

In [None]:
# load python packages
import pandas as pd
import numpy as np
# from scipy.io import savemat (if you want to save final table as .mat)
from scipy.io import loadmat
from scipy.stats import rankdata

import QS_functions as fm

In [None]:
# Load df_crossers.mat
data = loadmat('.../my_path/df_meditracks.mat')

cols = ['id', 'lon', 'lat', 'year', 'month', 'day', 'time', 'hPa']
df_crossers = pd.DataFrame({col: data[col].flatten() for col in cols})
df_crossers['medi_tracks'] = (data['medi_tracks'].flatten() == 1)

# retain only medi_tracks that are True
df_crossers = df_crossers[df_crossers['medi_tracks']]

## Full-Track Stationarity

In [None]:
def assign_quantiles_and_categories(values, name):
    # compute quantile ranks (0 to 1)
    ranks = rankdata(values, method='ordinal')
    quantiles = np.round((ranks - 1) / (len(values) - 1), 3)

    # assign categories: 0 = unclassified, 1 = low, 2 = medium, 3 = high
    # quantiles can be changed to suit needs of user
    categories = np.zeros_like(quantiles)
    categories[quantiles <= 0.10] = 1
    categories[(quantiles >= 0.45) & (quantiles <= 0.55)] = 2
    categories[quantiles >= 0.90] = 3
    categories[np.isnan(values)] = np.nan

    return pd.DataFrame({
        'id': df_crossers['id'].unique()[:len(values)],  # match IDs
        f'{name}_v': np.round(values, 3),
        f'{name}_q': quantiles,
        f'{name}_c': categories
    })

### Median Velocity "FT_MED_VEL"
- based on median propagation speed of a cyclone over its whole lifetime

In [None]:
# Calculate median propagation speed (per hour) for each cyclone
# --------------------------------------------------------------
# For each unique cyclone ID in the dataframe:
#   - select its track points (lat/lon)
#   - compute the distance between consecutive points (1-hour time steps)
#   - store the median speed of each cyclone
#
# Cyclone categorisation (slow, average, fast) is based on quantiles
# and is handled using a helper function to assign quantile ranks and categories.
# Resulting values are added to the main dataframe.
# --------------------------------------------------------------

list_med_vel = []

for cyclone_id in df_crossers['id'].unique():
    cyclone = df_crossers[df_crossers['id'] == cyclone_id].reset_index(drop=True)
    if len(cyclone) < 2:
        continue
    dists = [
        fm.haversine(cyclone.loc[t, 'lon'], cyclone.loc[t, 'lat'],
                     cyclone.loc[t+1, 'lon'], cyclone.loc[t+1, 'lat'])
        for t in range(len(cyclone) - 1)
    ]
    list_med_vel.append(np.median(dists))

median_vel = np.array(list_med_vel)

# Create DataFrame with quantiles and categories
df_med_vel = assign_quantiles_and_categories(median_vel, 'FT_MED_VEL')

# Merge with original data
df_crossers = df_crossers.merge(df_med_vel, on='id', how='left')

### Total Distance "FT_TOT_DIST" ('Path Stationarity' as in Aregger, 2021)
- based on maximum distance that a cyclone can travel over its whole lifetime
- calculated by summing up the distances between each observational timestep

In [None]:
# Calculate total distance for each cyclone
# --------------------------------------------------------------
# For each unique cyclone ID in the dataframe:
#   - select its track points (lat/lon)
#   - compute the distance between consecutive points (1-hour time steps)
#   - store the median speed of each cyclone
#
# Cyclone categorisation (slow, average, fast) is based on quantiles
# and is handled using a helper function to assign quantile ranks and categories.
# Resulting values are added to the main dataframe.
# --------------------------------------------------------------

list_tot_dist = []

for cyclone_id in df_crossers['id'].unique():
    cyclone = df_crossers[df_crossers['id'] == cyclone_id].reset_index(drop=True)
    if len(cyclone) < 2:
        continue
    dists = [
        fm.haversine(cyclone.loc[t, 'lon'], cyclone.loc[t, 'lat'],
                     cyclone.loc[t+1, 'lon'], cyclone.loc[t+1, 'lat'])
        for t in range(len(cyclone) - 1)
    ]
    list_tot_dist.append(np.sum(dists))

tot_dist = np.array(list_tot_dist)

# Create DataFrame with quantiles and categories
df_tot_dist = assign_quantiles_and_categories(tot_dist, 'FT_TOT_DIST')

# Merge with original data
df_crossers = df_crossers.merge(df_tot_dist, on='id', how='left')

## Along Track Stationarity

The following three metrics all focus on distances covered in a 12-hr timescale.

Users may change this window by altering the following variable:

In [None]:
window = 12

In [None]:
# categorisation function, reusable for any dist DataFrame with ['id', 'lon', 'lat', 'dist_sum']
def categorise_distances(df_dist, df_original, prefix):
    # calculate percentiles (user may change these thresholds according to preference)
    all_dist = df_dist['dist_sum'].values
    perc_90 = np.percentile(all_dist, 90)
    perc_45 = np.percentile(all_dist, 45)
    perc_55 = np.percentile(all_dist, 55)
    perc_10 = np.percentile(all_dist, 10)

    # full values per ID with padding NaNs for trailing points
    full_values = []
    for ID_unique in np.unique(df_original.id.values):
        vals = np.array([v for v in df_dist.loc[df_dist['id'] == ID_unique, 'dist_sum']])
        vals = np.round(vals, 3)
        vals_padded = np.append(vals, [np.nan]*window)  # pad for alignment
        full_values.append(vals_padded)
    full_values_concat = np.concatenate(full_values)

    # calculate quantile ranks
    ranks = rankdata(full_values_concat, method='ordinal', nan_policy='omit')
    quantiles = np.where(np.isnan(full_values_concat), np.nan, (ranks - 1) / (len(full_values_concat[~np.isnan(full_values_concat)]) - 1))
    quantiles = np.round(quantiles, 3)

    # categorise based on quantiles
    categories = np.zeros_like(quantiles)
    categories[quantiles <= 0.1] = 1
    categories[(quantiles >= 0.45) & (quantiles <= 0.55)] = 2
    categories[quantiles >= 0.9] = 3
    categories[np.isnan(quantiles)] = np.nan

    # create DataFrame to join back to 'original' df_crossers
    new_cols = pd.DataFrame({
        f'{prefix}_v': full_values_concat,
        f'{prefix}_q': quantiles,
        f'{prefix}_c': categories    
    })
    new_cols.index = df_original.index  # align index

    # return categorised dataframe and the three tracks filtered by percentile groups
    return new_cols

### 12-hour Distance "AT_12h_DIST"
- measures the total path distance a cyclone travels over 12 consecutive hourly steps

In [None]:
def calculate_dist12h_df(df):
    list_dist_12hrs = []
    for id_val, group in df.groupby('id'):
        lons = group['lon'].values
        lats = group['lat'].values
        for i in range(len(group) - window):
            dist_sum = sum([fm.haversine(lons[j], lats[j], lons[j+1], lats[j+1]) for j in range(i, i+window)])
            list_dist_12hrs.append([id_val, lons[i], lats[i], dist_sum])
    return pd.DataFrame(list_dist_12hrs, columns=['id', 'lon', 'lat', 'dist_sum'])

In [None]:
df_12hr = calculate_dist12h_df(df_crossers)
new_cols_12hr = categorise_distances(df_12hr, df_crossers, 'AT_12h_DIST')

In [None]:
df_crossers = pd.concat([df_crossers, new_cols_12hr], axis=1)

### Radial Distance "AT_RAD_DIST"
- computes the sum of straight-line distances from a reference point to each of the next 12 hourly positions (like spokes of a wheel)

In [None]:
def calculate_radial_df(df):
    list_dist_rad = []
    for id_val, group in df.groupby('id'):
        lons = group['lon'].values
        lats = group['lat'].values
        for i in range(len(group) - window):
            dist_sum = sum([fm.haversine(lons[i], lats[i], lons[j], lats[j]) for j in range(i+1, i+window+1)])
            list_dist_rad.append([id_val, lons[i], lats[i], dist_sum])
    return pd.DataFrame(list_dist_rad, columns=['id', 'lon', 'lat', 'dist_sum'])

In [None]:
df_radial = calculate_radial_df(df_crossers)
new_cols_rad = categorise_distances(df_radial, df_crossers, 'AT_RAD_DIST')

In [None]:
df_crossers = pd.concat([df_crossers, new_cols_rad], axis=1)

### Circle Distance "AT_CIRCLE_DIST"
- finds the maximum distance from a reference point to any of the next 12 positions, defining the radius of the smallest circle that contains them

In [None]:
def calculate_circle_df(df):
    list_circle = []
    for id_val, group in df.groupby('id'):
        if len(group) <= window:
            continue
        for i in range(len(group) - window):
            window_ = group.iloc[i:i + window+1]
            center_lon, center_lat = window_.iloc[0][['lon', 'lat']]
            lons = window_['lon'].values[1:]
            lats = window_['lat'].values[1:]
            distances = fm.haversine(center_lon, center_lat, lons, lats)
            max_distance = distances.max()
            list_circle.append([id_val, center_lon, center_lat, max_distance])
    return pd.DataFrame(list_circle, columns=['id', 'lon', 'lat', 'dist_sum'])

In [None]:
df_circle = calculate_circle_df(df_crossers)
new_cols_circle = categorise_distances(df_circle, df_crossers, 'AT_CIRCLE_DIST')

In [None]:
df_crossers = pd.concat([df_crossers, new_cols_circle], axis=1)

## Save final QS Table as csv file

In [18]:
df_QS = df_crossers
df_QS
# df_QS.to_csv('/.../my_path/df_QS.csv', index=False) 

Unnamed: 0,id,lon,lat,year,month,day,time,hPa,medi_tracks,FT_MED_VEL_v,...,FT_TOT_DIST_c,AT_12h_DIST_v,AT_12h_DIST_q,AT_12h_DIST_c,AT_RAD_DIST_v,AT_RAD_DIST_q,AT_RAD_DIST_c,AT_CIRCLE_DIST_v,AT_CIRCLE_DIST_q,AT_CIRCLE_DIST_c
0,4,0.960,40.700,1979,1,8,18,1016.93,True,31.008,...,0.0,293.162,0.585,0.0,1554.441,0.513,2.0,259.569,0.573,0.0
1,4,1.080,40.761,1979,1,8,19,1016.93,True,31.008,...,0.0,313.308,0.628,0.0,1710.421,0.566,0.0,279.078,0.616,0.0
2,4,1.300,40.746,1979,1,8,20,1016.64,True,31.008,...,0.0,321.903,0.645,0.0,1779.949,0.588,0.0,288.992,0.636,0.0
3,4,1.624,40.699,1979,1,8,21,1016.35,True,31.008,...,0.0,319.393,0.640,0.0,1752.835,0.580,0.0,290.526,0.639,0.0
4,4,1.986,40.693,1979,1,8,22,1015.86,True,31.008,...,0.0,313.970,0.629,0.0,1687.237,0.558,0.0,287.221,0.633,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173785,3807,36.114,44.208,2020,9,8,2,1015.46,True,10.430,...,0.0,,,,,,,,,
173786,3807,36.228,44.162,2020,9,8,3,1015.79,True,10.430,...,0.0,,,,,,,,,
173787,3807,36.323,44.163,2020,9,8,4,1016.16,True,10.430,...,0.0,,,,,,,,,
173788,3807,36.414,44.197,2020,9,8,5,1016.56,True,10.430,...,0.0,,,,,,,,,
