In [None]:
# ZAR OIS Curve Prediction from o/n rate and FX spot using Deep Learning

# STEP 1: Load and Inspect Data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from scipy.interpolate import interp1d

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load the data
file_path = "ZAR_OIS_curve_data.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)

# Convert 'Dates' column to datetime
df['Dates'] = pd.to_datetime(df['Dates'])
df.set_index('Dates', inplace=True)

# Drop NA rows
df.dropna(inplace=True)

# STEP 2: Compute interpolated 1-day forward rate after each short-term tenor (1mâ€“6m)
tenor_map = {"1m": 30/365, "2m": 60/365, "3m": 90/365, "4m": 120/365, "5m": 150/365, "6m": 180/365,
             "7m": 210/365, "8m": 240/365, "9m": 270/365, "1y": 365/365, "2y": 2, "3y": 3}

def calc_1d_forward(df_row, tenors=tenor_map):
    curve_x = [tenors[k] for k in tenors]
    curve_y = [df_row[k] for k in tenors]
    interpolator = interp1d(curve_x, curve_y, kind='cubic', fill_value='extrapolate')
    one_day = 1 / 365
    results = {}
    for k in ["1m", "2m", "3m", "4m", "5m", "6m"]:
        base = tenors[k]
        r1 = interpolator(base)
        r2 = interpolator(base + one_day)
        fwd_rate = ((1 + r2)**(base + one_day) / (1 + r1)**base)**(1 / one_day) - 1
        results[f"fwd_1d_after_{k}"] = fwd_rate
    return pd.Series(results)

fwd_features = df.apply(calc_1d_forward, axis=1)

# Combine features
X = pd.concat([fwd_features, df[['USDZAR', 'o/n interest rate']]], axis=1).dropna()
y = df.loc[X.index, list(tenor_map.keys())]  # target OIS curve

# Build sequences to model time momentum (e.g., 7-day window)
SEQUENCE_LENGTH = 7
X_seq = []
y_seq = []
dates_seq = []
for i in range(SEQUENCE_LENGTH, len(X)):
    X_seq.append(X.iloc[i - SEQUENCE_LENGTH:i].values)
    y_seq.append(y.iloc[i].values)
    dates_seq.append(X.index[i])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)
dates_seq = np.array(dates_seq)

# Train-test split
train_size = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]
dates_test = dates_seq[train_size:]

# Normalize features and targets separately
feature_scaler = StandardScaler()
target_scaler = MinMaxScaler()

X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])

X_train_scaled = feature_scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_scaled = feature_scaler.transform(X_test_reshaped).reshape(X_test.shape)

y_train_scaled = target_scaler.fit_transform(y_train)
y_test_scaled = target_scaler.transform(y_test)

# STEP 3: Deep Learning Model with LSTM + Checkpointing
model = Sequential([
    LSTM(128, input_shape=(SEQUENCE_LENGTH, X_train_scaled.shape[2]), return_sequences=False),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y_train.shape[1])
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

checkpoint_cb = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(patience=15, restore_best_weights=True)
model.fit(X_train_scaled, y_train_scaled,
          validation_split=0.2,
          epochs=200,
          batch_size=32,
          callbacks=[early_stop, checkpoint_cb],
          verbose=1)

# Predict and Evaluate
best_model = tf.keras.models.load_model('best_model.keras')
dl_preds_scaled = best_model.predict(X_test_scaled)
dl_preds = target_scaler.inverse_transform(dl_preds_scaled)

dl_mse = mean_squared_error(y_test, dl_preds)
dl_r2 = r2_score(y_test, dl_preds)
print(f"Deep Learning - MSE: {dl_mse:.4f}, R2: {dl_r2:.4f}")

# STEP 4: Visualize All Tenors with Spread
tenors = list(tenor_map.keys())
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(18, 12), sharex=True)
axes = axes.flatten()
for i, tenor in enumerate(tenors):
    ax = axes[i]
    ax.plot(dates_test, y_test[:, i], label='Actual')
    ax.plot(dates_test, dl_preds[:, i], label='Predicted', linestyle='--')
    ax.fill_between(dates_test, y_test[:, i], dl_preds[:, i], color='gray', alpha=0.3, label='Spread')
    ax.set_title(f"{tenor} Tenor")
    ax.legend()
    ax.grid(True)
plt.tight_layout()
plt.show()

# STEP 5: Visualize Inputs + Curve Output
sample_idx = 100  # pick a random test sample index
input_window = X_test[sample_idx]
input_features = X.columns.tolist()
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
for i in range(input_window.shape[1]):
    plt.plot(range(SEQUENCE_LENGTH), input_window[:, i], label=input_features[i])
plt.title("Input Features over 7 Days")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(tenors, y_test[sample_idx], marker='o', label='Actual Curve')
plt.plot(tenors, dl_preds[sample_idx], marker='x', linestyle='--', label='Predicted Curve')
plt.title("Actual vs Predicted Full OIS Curve")
plt.ylabel("Rate (%)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# STEP 6: Prediction on New Input (user-defined array)
def predict_from_inputs(custom_inputs):
    # custom_inputs shape: (7 days, same features as X.columns)
    custom_inputs_scaled = feature_scaler.transform(custom_inputs)
    custom_inputs_scaled = custom_inputs_scaled.reshape(1, SEQUENCE_LENGTH, -1)
    pred_scaled = best_model.predict(custom_inputs_scaled)
    pred = target_scaler.inverse_transform(pred_scaled)[0]
    plt.figure(figsize=(8, 5))
    plt.plot(tenors, pred, marker='x', linestyle='--', label='Predicted Curve')
    plt.title("Predicted OIS Curve for Custom Input")
    plt.ylabel("Rate (%)")
    plt.grid(True)
    plt.legend()
    plt.show()
    return pred

# Example usage:
# custom_inputs = np.array([...])  # shape: (7, num_features)
# predict_from_inputs(custom_inputs)
