# Model Evaluation

This notebook provides a detailed evaluation of our trained cryptocurrency prediction model.

## Contents
1. Load Model and Data
2. Performance Metrics
3. Prediction Analysis
4. Error Analysis
5. Trading Performance
6. Model Robustness Tests


In [None]:
# Import required libraries
import tensorflow as tf
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Import our custom modules
from src.training.model import CryptoPredictor
from src.preprocessing.pipeline import Pipeline
from src.visualization.visualizer import CryptoVisualizer


## 1. Load Model and Data


In [None]:
# Load the best model from previous experiments
model = CryptoPredictor.load('models/best_model.h5')

# Load test data
pipeline = Pipeline()
test_data = pd.read_csv('data/processed/test_data.csv', index_col='timestamp', parse_dates=True)
test_sequences = pipeline.prepare_sequences(test_data)

# Generate predictions
predictions = model.predict(test_sequences['X_test'])

# Inverse transform predictions and actual values
predictions_original = pipeline.inverse_transform(predictions)
actual_original = pipeline.inverse_transform(test_sequences['y_test'])


## 2. Performance Metrics


In [None]:
def calculate_metrics(y_true, y_pred):
    """Calculate various performance metrics"""
    metrics = {
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'R2': r2_score(y_true, y_pred),
        'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    }
    
    # Directional accuracy
    direction_true = np.sign(np.diff(y_true))
    direction_pred = np.sign(np.diff(y_pred))
    metrics['Directional Accuracy'] = np.mean(direction_true == direction_pred) * 100
    
    return metrics

metrics = calculate_metrics(actual_original, predictions_original)
pd.DataFrame(metrics, index=['Value']).T


## 3. Prediction Analysis


In [None]:
def analyze_predictions():
    """Create detailed prediction analysis plots"""
    # Time series plot
    fig = make_subplots(rows=2, cols=1,
                        subplot_titles=('Price Predictions', 'Prediction Error'))
    
    # Actual vs Predicted
    fig.add_trace(
        go.Scatter(y=actual_original, name='Actual', line=dict(color='blue')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(y=predictions_original, name='Predicted', line=dict(color='red')),
        row=1, col=1
    )
    
    # Error plot
    errors = predictions_original - actual_original
    fig.add_trace(
        go.Scatter(y=errors, name='Error', line=dict(color='green')),
        row=2, col=1
    )
    
    fig.update_layout(height=800, title='Prediction Analysis')
    return fig

fig = analyze_predictions()
fig.show()


## 4. Error Analysis


In [None]:
def analyze_errors():
    """Analyze prediction errors in detail"""
    errors = predictions_original - actual_original
    
    # Error distribution
    plt.figure(figsize=(12, 4))
    
    plt.subplot(121)
    sns.histplot(errors, kde=True)
    plt.title('Error Distribution')
    
    plt.subplot(122)
    sns.scatterplot(x=actual_original, y=errors)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('Errors vs Actual Values')
    
    plt.tight_layout()
    plt.show()
    
    # Error statistics
    error_stats = pd.Series({
        'Mean Error': np.mean(errors),
        'Std Error': np.std(errors),
        'Max Error': np.max(errors),
        'Min Error': np.min(errors),
        'Skewness': pd.Series(errors).skew(),
        'Kurtosis': pd.Series(errors).kurtosis()
    })
    
    return error_stats

error_stats = analyze_errors()
print("\nError Statistics:")
print(error_stats)


## 5. Trading Performance


In [None]:
def evaluate_trading_performance():
    """Evaluate model performance in trading context"""
    # Calculate returns
    actual_returns = np.diff(actual_original) / actual_original[:-1]
    pred_returns = np.diff(predictions_original) / predictions_original[:-1]
    
    # Trading signals (1: buy, -1: sell, 0: hold)
    signals = np.sign(pred_returns)
    
    # Calculate strategy returns
    strategy_returns = signals[:-1] * actual_returns[1:]
    
    # Calculate metrics
    trading_metrics = {
        'Total Return': np.sum(strategy_returns),
        'Annualized Return': np.mean(strategy_returns) * 252,
        'Sharpe Ratio': np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(252),
        'Win Rate': np.mean(strategy_returns > 0) * 100,
        'Max Drawdown': np.min(np.maximum.accumulate(strategy_returns) - strategy_returns)
    }
    
    # Plot cumulative returns
    plt.figure(figsize=(12, 6))
    plt.plot(np.cumprod(1 + strategy_returns) - 1, label='Strategy')
    plt.plot(np.cumprod(1 + actual_returns[1:]) - 1, label='Buy & Hold')
    plt.title('Cumulative Returns')
    plt.legend()
    plt.show()
    
    return pd.Series(trading_metrics)

trading_metrics = evaluate_trading_performance()
print("\nTrading Performance Metrics:")
print(trading_metrics)


## 6. Model Robustness Tests


In [None]:
def test_model_robustness():
    """Test model performance under different conditions"""
    robustness_tests = {}
    
    # Test with different market conditions
    returns = np.diff(actual_original) / actual_original[:-1]
    volatility = pd.Series(returns).rolling(20).std()
    
    # High volatility periods
    high_vol_mask = volatility > volatility.median()
    high_vol_metrics = calculate_metrics(
        actual_original[high_vol_mask],
        predictions_original[high_vol_mask]
    )
    robustness_tests['High Volatility'] = high_vol_metrics
    
    # Low volatility periods
    low_vol_mask = volatility <= volatility.median()
    low_vol_metrics = calculate_metrics(
        actual_original[low_vol_mask],
        predictions_original[low_vol_mask]
    )
    robustness_tests['Low Volatility'] = low_vol_metrics
    
    # Up trend vs Down trend
    trend = pd.Series(actual_original).rolling(20).mean().diff()
    
    up_trend_mask = trend > 0
    up_trend_metrics = calculate_metrics(
        actual_original[up_trend_mask],
        predictions_original[up_trend_mask]
    )
    robustness_tests['Up Trend'] = up_trend_metrics
    
    down_trend_mask = trend <= 0
    down_trend_metrics = calculate_metrics(
        actual_original[down_trend_mask],
        predictions_original[down_trend_mask]
    )
    robustness_tests['Down Trend'] = down_trend_metrics
    
    return pd.DataFrame(robustness_tests)

robustness_results = test_model_robustness()
print("\nRobustness Test Results:")
print(robustness_results)
