# Model Experimentation

This notebook explores different model architectures and performs hyperparameter tuning.

## Contents
1. Setup and Data Preparation
2. Baseline Model
3. Architecture Exploration
4. Hyperparameter Tuning
5. Model Comparison
6. Final Model Selection


In [None]:
# Import required libraries
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import EarlyStopping, ModelCheckpoint
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import our custom modules
from src.training.model import CryptoPredictor
from src.preprocessing.pipeline import Pipeline


## 1. Setup and Data Preparation


In [None]:
# Load processed features from previous notebook
processed_data = pd.read_csv('data/processed/features_engineered.csv', index_col='timestamp', parse_dates=True)

# Initialize pipeline
pipeline = Pipeline(
    sequence_length=60,
    prediction_length=1
)

# Prepare sequences
result = pipeline.run(processed_data)

print("Training data shape:", result['X_train'].shape)
print("Validation data shape:", result['X_val'].shape)
print("Test data shape:", result['X_test'].shape)


## 2. Baseline Model


In [None]:
def create_baseline_model(input_shape):
    """Create a simple baseline LSTM model"""
    model = Sequential([
        LSTM(64, input_shape=input_shape),
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Train baseline model
baseline_model = create_baseline_model((result['X_train'].shape[1], result['X_train'].shape[2]))
baseline_history = baseline_model.fit(
    result['X_train'],
    result['y_train'],
    validation_data=(result['X_val'], result['y_val']),
    epochs=50,
    batch_size=32,
    callbacks=[EarlyStopping(patience=5)]
)

# Plot training history
fig = go.Figure()
fig.add_trace(go.Scatter(y=baseline_history.history['loss'], name='Training Loss'))
fig.add_trace(go.Scatter(y=baseline_history.history['val_loss'], name='Validation Loss'))
fig.update_layout(title='Baseline Model Training History')
fig.show()


## 3. Architecture Exploration


In [None]:
def experiment_architectures():
    """Test different model architectures"""
    architectures = {
        'single_lstm': CryptoPredictor(
            input_shape=(60, result['X_train'].shape[2]),
            lstm_units=[64],
            dropout_rate=0.2
        ),
        'stacked_lstm': CryptoPredictor(
            input_shape=(60, result['X_train'].shape[2]),
            lstm_units=[64, 32],
            dropout_rate=0.2
        ),
        'lstm_with_bn': CryptoPredictor(
            input_shape=(60, result['X_train'].shape[2]),
            lstm_units=[64, 32],
            dropout_rate=0.2,
            use_batch_norm=True
        )
    }
    
    results = {}
    for name, model in architectures.items():
        print(f"\nTraining {name}...")
        model.build()
        model.compile()
        
        history = model.fit(
            result['X_train'],
            result['y_train'],
            validation_data=(result['X_val'], result['y_val']),
            epochs=50,
            batch_size=32,
            callbacks=[EarlyStopping(patience=5)]
        )
        
        results[name] = {
            'history': history.history,
            'model': model
        }
    
    return results

architecture_results = experiment_architectures()

# Plot comparison
fig = go.Figure()
for name, result in architecture_results.items():
    fig.add_trace(go.Scatter(y=result['history']['val_loss'], name=f'{name} Val Loss'))
fig.update_layout(title='Architecture Comparison')
fig.show()


## 4. Hyperparameter Tuning


In [None]:
from sklearn.model_selection import ParameterGrid

def tune_hyperparameters():
    """Perform grid search for hyperparameters"""
    param_grid = {
        'lstm_units': [[32], [64], [128], [64, 32], [128, 64]],
        'dropout_rate': [0.1, 0.2, 0.3],
        'learning_rate': [0.001, 0.0005, 0.0001]
    }
    
    results = []
    for params in ParameterGrid(param_grid):
        print(f"\nTesting parameters: {params}")
        
        model = CryptoPredictor(
            input_shape=(60, result['X_train'].shape[2]),
            lstm_units=params['lstm_units'],
            dropout_rate=params['dropout_rate'],
            learning_rate=params['learning_rate']
        )
        
        model.build()
        model.compile()
        
        history = model.fit(
            result['X_train'],
            result['y_train'],
            validation_data=(result['X_val'], result['y_val']),
            epochs=50,
            batch_size=32,
            callbacks=[EarlyStopping(patience=5)]
        )
        
        best_val_loss = min(history.history['val_loss'])
        results.append({
            'params': params,
            'best_val_loss': best_val_loss
        })
    
    return pd.DataFrame(results).sort_values('best_val_loss')

tuning_results = tune_hyperparameters()
print("\nBest hyperparameters:")
tuning_results.head()


## 5. Model Comparison


In [None]:
from matplotlib import pyplot as plt


def compare_models(baseline_history, architecture_results, best_tuned_model):
    """Compare all model variants"""
    comparison_metrics = pd.DataFrame()
    
    # Add baseline results
    comparison_metrics.loc['baseline', 'val_loss'] = min(baseline_history.history['val_loss'])
    
    # Add architecture results
    for name, result in architecture_results.items():
        comparison_metrics.loc[name, 'val_loss'] = min(result['history']['val_loss'])
    
    # Add best tuned model
    comparison_metrics.loc['tuned', 'val_loss'] = best_tuned_model['best_val_loss']
    
    return comparison_metrics

comparison = compare_models(
    baseline_history,
    architecture_results,
    tuning_results.iloc[0]
)

# Plot comparison
plt.figure(figsize=(10, 6))
comparison['val_loss'].plot(kind='bar')
plt.title('Model Comparison')
plt.ylabel('Validation Loss')
plt.show()


## 6. Final Model Selection


In [None]:
# Create final model with best parameters
best_params = tuning_results.iloc[0]['params']
final_model = CryptoPredictor(
    input_shape=(60, result['X_train'].shape[2]),
    **best_params
)

final_model.build()
final_model.compile()

# Train final model
final_history = final_model.fit(
    result['X_train'],
    result['y_train'],
    validation_data=(result['X_val'], result['y_val']),
    epochs=100,
    batch_size=32,
    callbacks=[
        EarlyStopping(patience=10),
        ModelCheckpoint('models/best_model.h5', save_best_only=True)
    ]
)

# Save model architecture and hyperparameters
model_config = {
    'architecture': best_params,
    'training_params': {
        'batch_size': 32,
        'initial_epochs': 100,
        'early_stopping_patience': 10
    },
    'performance': {
        'best_val_loss': min(final_history.history['val_loss'])
    }
}

import json
with open('models/model_config.json', 'w') as f:
    json.dump(model_config, f, indent=4)

print("Final model saved with configuration")
