In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
formation = pd.read_csv('../data/processed/predictors/formation.csv', parse_dates=['formation_datetime'])
amo = pd.read_csv('../data/raw/amo.tsv', sep='\t', index_col=0)
amo.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1850,0.232434,0.05592,-0.002504,0.06046,-0.000834,-0.044254,-0.213111,-0.120864,-0.032553,0.457496,0.070701,0.252019
1851,-0.11162,0.170289,0.376996,0.25618,0.036857,-0.26432,0.00063,-0.299903,0.21759,0.569602,0.303239,0.311832
1852,0.684527,0.123146,0.309928,0.233993,0.175499,0.086999,0.097931,0.265288,-0.015371,0.335482,0.129818,-0.085432
1853,-0.081682,0.236108,-0.02945,0.178862,0.082808,0.152384,-0.451089,0.064525,0.293643,0.15198,-0.165519,0.149231
1854,0.342942,0.147242,0.441997,0.218296,0.346287,0.358791,0.364841,0.543447,0.260755,0.107569,0.539965,0.772307


In [4]:
amo = amo.replace(-999.9000, np.nan)

In [10]:
ts = amo.stack(dropna=False)

# Optional: Convert index to actual timestamps for perfect spacing handling
# (Useful if your months aren't perfectly uniform, but optional for standard data)
ts.index = pd.to_datetime([f"{y}-{m}-01" for y, m in ts.index])

# 2. INTERPOLATE
# method='spline' with order=3 is ideal for oscillating natural phenomena
# limit_direction='both' ensures ends are filled if missing
interpolated_ts = ts.interpolate(method='spline', order=3, limit_direction='both')

# 3. RESHAPE
# Unstack to get back to your original Year x Month format
amo_interpolated= interpolated_ts.to_frame().assign(
    Year=interpolated_ts.index.year,
    Month=interpolated_ts.index.month
).pivot(index='Year', columns='Month', values=0)

amo_interpolated.head()

  ts = amo.stack(dropna=False)


Month,1,2,3,4,5,6,7,8,9,10,11,12
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1850,0.232434,0.05592,-0.002504,0.06046,-0.000834,-0.044254,-0.213111,-0.120864,-0.032553,0.457496,0.070701,0.252019
1851,-0.11162,0.170289,0.376996,0.25618,0.036857,-0.26432,0.00063,-0.299903,0.21759,0.569602,0.303239,0.311832
1852,0.684527,0.123146,0.309928,0.233993,0.175499,0.086999,0.097931,0.265288,-0.015371,0.335482,0.129818,-0.085432
1853,-0.081682,0.236108,-0.02945,0.178862,0.082808,0.152384,-0.451089,0.064525,0.293643,0.15198,-0.165519,0.149231
1854,0.342942,0.147242,0.441997,0.218296,0.346287,0.358791,0.364841,0.543447,0.260755,0.107569,0.539965,0.772307


In [11]:
def get_amo(dt):
	result = amo_interpolated.loc[dt.year, dt.month]
	return np.nan if result == -999.9000 else result

In [12]:
formation['amo'] = formation.apply(lambda row: get_amo(row.formation_datetime), axis=1)

In [13]:
results = formation.drop(columns=['formation_datetime', 'formation_lat', 'formation_lon'])
results

Unnamed: 0,code,amo
0,AL011851,-0.264320
1,AL011852,0.265288
2,AL011853,0.064525
3,AL011854,0.358791
4,AL011855,0.101421
...,...,...
1986,AL301969,-0.347504
1987,AL302005,-0.005498
1988,AL302020,-0.172008
1989,AL312005,0.211006


In [14]:
results.to_csv('../data/processed/predictors/amo.csv', index=False)