In [None]:
# STEP 1: Load and Inspect Data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
file_path = "ZAR_OIS_curve_data.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)

# Convert 'Dates' column to datetime
df['Dates'] = pd.to_datetime(df['Dates'])
df.set_index('Dates', inplace=True)

# Check and drop missing values (or impute if needed)
df.dropna(inplace=True)

# Define features and targets
features = ['USDZAR', 'o/n interest rate']
targets = ['1m', '2m', '3m', '4m', '5m', '6m', '7m', '8m', '9m', '1y', '2y', '3y']

X = df[features].values
y = df[targets].values

# Train-test split (time-aware)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# STEP 2: Gradient Boosting Model
print("Training Gradient Boosting Model...")
gbr_model = MultiOutputRegressor(GradientBoostingRegressor(
    n_estimators=300, learning_rate=0.05, max_depth=4))
gbr_model.fit(X_train_scaled, y_train)
gbr_preds = gbr_model.predict(X_test_scaled)

# Evaluate GBR
print("Evaluating Gradient Boosting...")
gbr_mse = mean_squared_error(y_test, gbr_preds)
gbr_r2 = r2_score(y_test, gbr_preds)
print(f"Gradient Boosting - MSE: {gbr_mse:.4f}, R2: {gbr_r2:.4f}")

# STEP 3: Deep Learning Model
print("Training Deep Learning Model...")
dnn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(len(targets))
])

dnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

early_stop = EarlyStopping(patience=10, restore_best_weights=True)
dnn_model.fit(X_train_scaled, y_train,
              validation_split=0.2,
              epochs=100,
              batch_size=32,
              callbacks=[early_stop],
              verbose=1)

dl_preds = dnn_model.predict(X_test_scaled)

# Evaluate DL
print("Evaluating Deep Learning Model...")
dl_mse = mean_squared_error(y_test, dl_preds)
dl_r2 = r2_score(y_test, dl_preds)
print(f"Deep Learning - MSE: {dl_mse:.4f}, R2: {dl_r2:.4f}")

# STEP 4: Visualization of predictions
plt.figure(figsize=(14, 6))
plt.plot(y_test[:, 9], label='Actual 1y', linewidth=2)
plt.plot(gbr_preds[:, 9], label='GBR Predicted 1y', linestyle='--')
plt.plot(dl_preds[:, 9], label='DL Predicted 1y', linestyle=':')
plt.title("1y OIS Curve Prediction")
plt.xlabel("Time")
plt.ylabel("Rate (%)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# STEP 5: Curve Shape Check
sample_idx = 50
plt.figure(figsize=(10, 5))
plt.plot(targets, y_test[sample_idx], marker='o', label='Actual')
plt.plot(targets, gbr_preds[sample_idx], marker='x', label='GBR')
plt.plot(targets, dl_preds[sample_idx], marker='s', label='DL')
plt.title("Full Curve Prediction (Sample Day)")
plt.ylabel("Rate (%)")
plt.grid(True)
plt.legend()
plt.show()
