In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

# Step 1: Load & Preprocess Data
data = pd.read_csv('commodities_DAILY.csv')
data['Dates'] = pd.to_datetime(data['Dates'])
data = data.sort_values(by='Dates')

# Select price columns
price_cols = [col for col in data.columns if col.endswith('_PX_LAST')]

# Compute log returns
returns = np.log(data[price_cols] / data[price_cols].shift(1))
returns.dropna(inplace=True)

# Step 2: Compute Features Per Commodity
commodity_features = []
for commodity in price_cols:
    series = returns[commodity].dropna()
    mean_return = series.mean()
    volatility = series.std()
    skewness = series.skew()
    commodity_features.append([mean_return, volatility, skewness])

# Create DataFrame where rows are commodities and columns are features
commodity_df = pd.DataFrame(commodity_features, columns=["Mean Return", "Volatility", "Skewness"], index=price_cols)

# Standardize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(commodity_df)

# Step 3: K-Means Clustering
n_clusters = 4  
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
commodity_df['Cluster_KMeans'] = kmeans.fit_predict(normalized_features)

# Step 4: Deep K-Means using Autoencoder
def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(64, activation='relu')(input_layer)
    encoded = Dense(32, activation='relu')(encoded)
    encoded = Dense(16, activation='relu')(encoded)
    decoded = Dense(32, activation='relu')(encoded)
    decoded = Dense(64, activation='relu')(decoded)
    decoded = Dense(input_dim, activation='linear')(decoded)
    
    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)
    
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return autoencoder, encoder

autoencoder, encoder = build_autoencoder(normalized_features.shape[1])

# Train Autoencoder
autoencoder.fit(normalized_features, normalized_features, epochs=50, batch_size=8, verbose=1)

# Extract deep features
deep_features = encoder.predict(normalized_features)

# Apply K-Means to deep features
deep_kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
commodity_df['Cluster_DeepKMeans'] = deep_kmeans.fit_predict(deep_features)

# Step 5: Compute Correlations

# 1. Overall correlation
overall_corr = returns.corr()

# 2. Within-cluster correlation for K-Means
within_cluster_corr_kmeans = {}
for cluster in range(n_clusters):
    cluster_assets = commodity_df[commodity_df['Cluster_KMeans'] == cluster].index
    within_cluster_corr_kmeans[cluster] = returns[cluster_assets].corr()

# 3. Within-cluster correlation for Deep K-Means
within_cluster_corr_deep = {}
for cluster in range(n_clusters):
    cluster_assets = commodity_df[commodity_df['Cluster_DeepKMeans'] == cluster].index
    within_cluster_corr_deep[cluster] = returns[cluster_assets].corr()

# 4. Between-cluster correlation for K-Means
cluster_means_kmeans = returns.groupby(commodity_df['Cluster_KMeans'], axis=1).mean()
between_cluster_corr_kmeans = cluster_means_kmeans.corr()

# 5. Between-cluster correlation for Deep K-Means
cluster_means_deep = returns.groupby(commodity_df['Cluster_DeepKMeans'], axis=1).mean()
between_cluster_corr_deep = cluster_means_deep.corr()

# Step 6: Visualizing Correlations

plt.figure(figsize=(18, 6))

# Overall correlation heatmap
plt.subplot(1, 3, 1)
sns.heatmap(overall_corr, cmap="coolwarm", center=0, annot=False)
plt.title("Overall Correlation")

# K-Means: Between-cluster correlation
plt.subplot(1, 3, 2)
sns.heatmap(between_cluster_corr_kmeans, cmap="coolwarm", center=0, annot=True)
plt.title("Between-Cluster Correlation (K-Means)")

# Deep K-Means: Between-cluster correlation
plt.subplot(1, 3, 3)
sns.heatmap(between_cluster_corr_deep, cmap="coolwarm", center=0, annot=True)
plt.title("Between-Cluster Correlation (Deep K-Means)")

plt.tight_layout()
plt.show()

# Show within-cluster correlations for K-Means
for cluster, corr_matrix in within_cluster_corr_kmeans.items():
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, cmap="coolwarm", center=0, annot=True)
    plt.title(f"Within-Cluster Correlation (K-Means, Cluster {cluster})")
    plt.show()

# Show within-cluster correlations for Deep K-Means
for cluster, corr_matrix in within_cluster_corr_deep.items():
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, cmap="coolwarm", center=0, annot=True)
    plt.title(f"Within-Cluster Correlation (Deep K-Means, Cluster {cluster})")
    plt.show()


In [None]:
from sklearn.metrics import silhouette_score

adaptive_returns = returns.iloc[:, 1:].values  
scaler = StandardScaler()
adaptive_returns = scaler.fit_transform(adaptive_returns)

input_dim = adaptive_returns.shape[1]
encoding_dim = 5  

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='linear')(encoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(adaptive_returns, adaptive_returns, epochs=50, batch_size=16, verbose=0)

encoder = Model(input_layer, encoded)
encoded_features = encoder.predict(adaptive_returns)

# Step 2: Determine Optimal Clusters using Elbow Method & Silhouette Score
def evaluate_clusters(data, max_clusters=10):
    wcss = []
    silhouette_scores = []
    
    for k in range(2, max_clusters + 1):
        kmeans = KMeans(n_clusters=k, random_state=42, n_init='auto')
        cluster_labels = kmeans.fit_predict(data)
        wcss.append(kmeans.inertia_)
        silhouette_scores.append(silhouette_score(data, cluster_labels))
    
    return wcss, silhouette_scores

wcss_kmeans, silhouette_kmeans = evaluate_clusters(adaptive_returns)
wcss_deep, silhouette_deep = evaluate_clusters(encoded_features)

# Plot Elbow Method
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(2, len(wcss_kmeans) + 2), wcss_kmeans, marker='o', linestyle='--', label='K-Means')
plt.plot(range(2, len(wcss_deep) + 2), wcss_deep, marker='s', linestyle='-', label='Deep K-Means')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.title('Elbow Method for Clustering')
plt.legend()

# Plot Silhouette Score
plt.subplot(1, 2, 2)
plt.plot(range(2, len(silhouette_kmeans) + 2), silhouette_kmeans, marker='o', linestyle='--', label='K-Means')
plt.plot(range(2, len(silhouette_deep) + 2), silhouette_deep, marker='s', linestyle='-', label='Deep K-Means')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score for Clustering')
plt.legend()
plt.show()

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
import gym


scaler = StandardScaler()
normalized_returns = scaler.fit_transform(returns)
normalized_returns = pd.DataFrame(normalized_returns, columns=returns.columns, index=returns.index)

# Adaptive Normalization: Rolling-window scaling (6 months ≈ 126 trading days)
window_size = 126
rolling_means = returns.rolling(window=window_size).mean()
rolling_stds = returns.rolling(window=window_size).std()
adaptive_returns = (returns - rolling_means) / rolling_stds
adaptive_returns.dropna(inplace=True)

adaptive_returns = adaptive_returns.iloc[-len(returns):]
returns = returns.iloc[-len(adaptive_returns):].copy()

#n_clusters = 4  
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
kmeans_labels = kmeans.fit_predict(adaptive_returns)
returns['Cluster'] = kmeans_labels

def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(128, activation='relu')(input_layer)
    encoded = Dense(64, activation='relu')(encoded)
    encoded = Dense(32, activation='relu')(encoded)
    decoded = Dense(64, activation='relu')(encoded)
    decoded = Dense(128, activation='relu')(decoded)
    decoded = Dense(input_dim, activation='linear')(decoded)
    
    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)
    
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return autoencoder, encoder

input_dim = adaptive_returns.shape[1]
autoencoder, encoder = build_autoencoder(input_dim)

autoencoder.fit(adaptive_returns, adaptive_returns, epochs=50, batch_size=16, verbose=1)

encoded_features = encoder.predict(adaptive_returns)

deep_kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
deep_cluster_labels = deep_kmeans.fit_predict(encoded_features)
returns['Deep_Cluster'] = deep_cluster_labels

LSTM vs. Clustering-Based LSTM

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam


# Step 3: Return Forecasting (Optimized LSTM per Cluster)
def build_lstm_model(input_shape, dropout_rate=0.2):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(dropout_rate),
        LSTM(64, return_sequences=False),
        Dropout(dropout_rate),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Train LSTM on Full Dataset
full_X = np.expand_dims(adaptive_returns.values[:-1], axis=1)
full_Y = adaptive_returns.values[1:, 0]  # Predicting next step return
lstm_full = build_lstm_model(full_X.shape[1:])
lstm_full.fit(full_X, full_Y, epochs=20, batch_size=32, verbose=1)
full_pred = lstm_full.predict(full_X)

# Train LSTM for K-Means Clusters
kmeans_models = {}
kmeans_preds = {}
for cluster in range(n_clusters):
    cluster_data = returns[returns['Cluster'] == cluster].iloc[:, :-2]  # Exclude cluster labels
    if len(cluster_data) > 1:  # Ensure enough data for training
        X = np.expand_dims(cluster_data.values[:-1], axis=1)
        Y = cluster_data.values[1:, 0]  # Predict next step return
        model = build_lstm_model(X.shape[1:], dropout_rate=0.3)  # Optimized dropout rate
        model.fit(X, Y, epochs=20, batch_size=32, verbose=1)
        kmeans_models[cluster] = model
        kmeans_preds[cluster] = model.predict(X)

# Train LSTM for Deep K-Means Clusters
deep_kmeans_models = {}
deep_kmeans_preds = {}
for cluster in range(n_clusters):
    cluster_data = returns[returns['Deep_Cluster'] == cluster].iloc[:, :-2]  # Exclude cluster labels
    if len(cluster_data) > 1:
        X = np.expand_dims(cluster_data.values[:-1], axis=1)
        Y = cluster_data.values[1:, 0]
        model = build_lstm_model(X.shape[1:], dropout_rate=0.3)
        model.fit(X, Y, epochs=20, batch_size=32, verbose=1)
        deep_kmeans_models[cluster] = model
        deep_kmeans_preds[cluster] = model.predict(X)

# Evaluate Forecasting Performance
def reward_to_risk(predicted_values):
    mean_return = np.mean(predicted_values)
    risk = np.std(predicted_values)
    return mean_return / risk if risk != 0 else 0

def evaluate_forecasting(true_values, predicted_values):
    mae = mean_absolute_error(true_values, predicted_values)
    rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
    r2r = reward_to_risk(predicted_values)
    return mae, rmse, r2r

mae_full, rmse_full, r2r_full = evaluate_forecasting(full_Y, full_pred)
print("LSTM without Clustering - MAE:", mae_full, "RMSE:", rmse_full, "R2R:", r2r_full)

for cluster, model in kmeans_models.items():
    true_values = returns[returns['Cluster'] == cluster].iloc[1:, 0].values
    pred_values = kmeans_preds[cluster]
    mae, rmse, r2r = evaluate_forecasting(true_values, pred_values)
    print(f"LSTM for K-Means Cluster {cluster} - MAE: {mae}, RMSE: {rmse}, R2R: {r2r}")

for cluster, model in deep_kmeans_models.items():
    true_values = returns[returns['Deep_Cluster'] == cluster].iloc[1:, 0].values
    pred_values = deep_kmeans_preds[cluster]
    mae, rmse, r2r = evaluate_forecasting(true_values, pred_values)
    print(f"LSTM for Deep K-Means Cluster {cluster} - MAE: {mae}, RMSE: {rmse}, R2R: {r2r}")


In [None]:
pip install optuna

Performance Improvements with Hyperparameter Tuning

In [None]:
import optuna
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error

# Define hyperparameter optimization function
def objective(trial):
    # Hyperparameter search space
    lstm_units = trial.suggest_categorical("lstm_units", [32, 64, 128])
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    epochs = trial.suggest_int("epochs", 20, 50)

    # Define LSTM model
    model = Sequential([
        LSTM(lstm_units, return_sequences=True, input_shape=(1, adaptive_returns.shape[1])),
        Dropout(dropout_rate),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(1)
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    # Prepare Data
    X = np.expand_dims(adaptive_returns.values[:-1], axis=1)
    Y = adaptive_returns.values[1:, 0]

    X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

    # Train Model
    model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=0, validation_data=(X_val, Y_val))

    # Evaluate on validation set
    val_pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(Y_val, val_pred))

    return rmse  # Minimize RMSE

# Run Optuna Optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

# Best Hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Prepare Data Again (Outside Optuna)
X = np.expand_dims(adaptive_returns.values[:-1], axis=1)
Y = adaptive_returns.values[1:, 0]

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# **Now Define final_model using best hyperparameters**
final_model = Sequential([
    LSTM(best_params["lstm_units"], return_sequences=True, input_shape=(1, adaptive_returns.shape[1])),
    Dropout(best_params["dropout_rate"]),
    LSTM(best_params["lstm_units"], return_sequences=False),
    Dropout(best_params["dropout_rate"]),
    Dense(1)
])

final_model.compile(optimizer=Adam(learning_rate=best_params["learning_rate"]), loss='mse')

# **Train with Best Parameters**
final_model.fit(X_train, Y_train, epochs=best_params["epochs"], batch_size=best_params["batch_size"], verbose=1)

# Predictions
final_pred = final_model.predict(X_train)


In [None]:
import optuna
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
import numpy as np

# Function to build LSTM model
def build_lstm_model(input_shape, lstm_units, dropout_rate, learning_rate):
    model = Sequential([
        LSTM(lstm_units, return_sequences=True, input_shape=input_shape),
        Dropout(dropout_rate),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

# Function to optimize LSTM hyperparameters per cluster
def optimize_lstm_for_cluster(cluster_data, cluster_label):
    def objective(trial):
        # Hyperparameter search space
        lstm_units = trial.suggest_categorical("lstm_units", [32, 64, 128])
        dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
        learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
        batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
        epochs = trial.suggest_int("epochs", 20, 50)

        # Prepare Data
        X = np.expand_dims(cluster_data.values[:-1], axis=1)
        Y = cluster_data.values[1:, 0]

        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

        # Build & Train Model
        model = build_lstm_model((1, cluster_data.shape[1]), lstm_units, dropout_rate, learning_rate)
        model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=0, validation_data=(X_val, Y_val))

        # Evaluate on Validation Set
        val_pred = model.predict(X_val)
        rmse = np.sqrt(mean_squared_error(Y_val, val_pred))

        return rmse  # Minimize RMSE

    # Run Optuna Optimization for this cluster
    print(f"Optimizing LSTM for Cluster {cluster_label}...")
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=10)

    print(f"Best Hyperparameters for Cluster {cluster_label}: {study.best_params}")
    return study.best_params


In [None]:
# Dictionary to store trained models
optimized_kmeans_models = {}
optimized_deep_kmeans_models = {}

# Train LSTM per K-Means Cluster
for cluster in range(n_clusters):
    cluster_data = returns[returns['Cluster'] == cluster].iloc[:, :-2]  # Exclude cluster labels
    if len(cluster_data) > 1:
        best_params = optimize_lstm_for_cluster(cluster_data, cluster)
        
        # Prepare Data Again
        X = np.expand_dims(cluster_data.values[:-1], axis=1)
        Y = cluster_data.values[1:, 0]
        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

        # Train Final Optimized Model
        model = build_lstm_model((1, cluster_data.shape[1]), best_params["lstm_units"], best_params["dropout_rate"], best_params["learning_rate"])
        model.fit(X_train, Y_train, epochs=best_params["epochs"], batch_size=best_params["batch_size"], verbose=1)

        optimized_kmeans_models[cluster] = model

# Train LSTM per Deep K-Means Cluster
for cluster in range(n_clusters):
    cluster_data = returns[returns['Deep_Cluster'] == cluster].iloc[:, :-2]  # Exclude cluster labels
    if len(cluster_data) > 1:
        best_params = optimize_lstm_for_cluster(cluster_data, f"Deep-{cluster}")
        
        # Prepare Data Again
        X = np.expand_dims(cluster_data.values[:-1], axis=1)
        Y = cluster_data.values[1:, 0]
        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

        # Train Final Optimized Model
        model = build_lstm_model((1, cluster_data.shape[1]), best_params["lstm_units"], best_params["dropout_rate"], best_params["learning_rate"])
        model.fit(X_train, Y_train, epochs=best_params["epochs"], batch_size=best_params["batch_size"], verbose=1)

        optimized_deep_kmeans_models[cluster] = model
