In [None]:
# Cell 0: Imports and Setup
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from datetime import datetime, timedelta
from sklearn.metrics import mean_squared_error, mean_absolute_error
from IPython.display import display

# Suppress warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Configure matplotlib for notebook environment
plt.style.use('default')

# Import our integrated utilities
from src import (
    BinanceDataOrganizer, DataConfig, GroupedScaler,
    create_lstm_model, evaluate_model,
    plot_candlestick_chart,
    plot_prediction_accuracy_distribution,
    plot_model_performance_summary,
    plot_test_performance_metrics,
    plot_prediction_candlestick,
    production_config, test_config,
)

print("✅ Imports successful")


## 1. Configuration Selection

Choose between production and test configurations:

- **Production Config**: Full-scale deployment with larger models and more data
- **Test Config**: Fast execution for development and testing


In [None]:
# Cell 1: Configuration Selection
CONFIG_MODE = 'production'  # Change to 'production' for full-scale deployment

if CONFIG_MODE == 'production':
    config = production_config
    print("🚀 PRODUCTION mode")
else:
    config = test_config
    print("⚡ TEST mode")

print(f"Config: {config.symbol} {config.timeframe} | {config.start_date} to {config.end_date}")
print(f"Model: {config.lstm_units} units, {config.epochs} epochs, {config.sequence_length}→{config.prediction_length}")

# Create data config from selected configuration
data_config = config.get_data_config()


## 2. Data Loading and Processing

Load cryptocurrency data and create sequences for training.


In [None]:
# Cell 2: Data Loading
organizer = BinanceDataOrganizer(data_config)

In [None]:
# Cell 4: Data Scaling
scaled_data = organizer.get_scaled_data()
X_train_scaled = scaled_data['X_train_scaled']
y_train_scaled = scaled_data['y_train_scaled']
X_test_scaled = scaled_data['X_test_scaled']
y_test_scaled = scaled_data['y_test_scaled']

print(f"✅ Scaled: {X_train_scaled.shape} | Range: [{X_train_scaled.min():.3f}, {X_train_scaled.max():.3f}]")


## 3. Model Creation and Training

Create and train the LSTM model using the selected configuration.


In [None]:
# Cell 5: Model Creation
model = create_lstm_model(
    input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]),
    lstm_units=config.lstm_units,
    dropout_rate=config.dropout_rate,
    learning_rate=config.learning_rate,
    prediction_length=config.prediction_length
)

print(f"✅ Model: {model.count_params():,} parameters | {config.lstm_units} LSTM units")


In [None]:
# Cell 6: Model Training
print(f"🚀 Training {config.epochs} epochs...")

history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_test_scaled, y_test_scaled),
    epochs=config.epochs,
    batch_size=config.batch_size,
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=config.patience, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=config.lr_patience, factor=config.lr_factor, min_lr=config.min_lr)
    ]
)

print(f"✅ Training completed")

# Display training history charts
print("📊 Displaying training history...")
plot_model_performance_summary(history, title=f"{config.symbol} Training History")


In [None]:
# Cell 7: Model Evaluation and Analysis
# Display final training metrics
final_loss = history.history['loss'][-1]
final_mae = history.history['mae'][-1]
final_val_loss = history.history['val_loss'][-1]
final_val_mae = history.history['val_mae'][-1]

print(f"📊 Final Training Loss: {final_loss:.4f} | MAE: {final_mae:.4f}")
print(f"📊 Final Validation Loss: {final_val_loss:.4f} | MAE: {final_val_mae:.4f}")

# Evaluate model and display test performance metrics
evaluation_results = evaluate_model(
    model, X_test_scaled, y_test_scaled,
    organizer.scaler_y
)

print(f"📊 Test Performance: MAE={evaluation_results['test_mae']:.2f} | MAPE={evaluation_results['test_mape']:.1f}% | RMSE={evaluation_results['rmse']:.2f}")

# Display test performance metrics and trade-off analysis
print("📊 Displaying test performance metrics...")
plot_test_performance_metrics(evaluation_results, title=f"{config.symbol} Test Performance Analysis")

# Get predictions for analysis
y_pred = evaluation_results['predictions']
y_true = evaluation_results['y_true_original']

# Prediction analysis
prediction_errors = [np.sqrt(np.mean((y_pred[i] - y_true[i]) ** 2)) for i in range(len(y_pred))]
best_idx = np.argmin(prediction_errors)
worst_idx = np.argmax(prediction_errors)

print(f"📊 Best: RMSE={prediction_errors[best_idx]:.2f} | Worst: RMSE={prediction_errors[worst_idx]:.2f}")


In [None]:
# Cell 8: Best Prediction Candlestick Chart
print("📈 Best Prediction Candlestick Chart:")
# Get input data for the best prediction (last sequence from training data)
input_data = X_test_scaled[best_idx]  # This is the input sequence that led to the prediction
# Get scaled predictions directly from the model (not unscaled from evaluate_model)
pred_data_scaled = model.predict(X_test_scaled[best_idx:best_idx+1], verbose=0)[0]  # Scaled prediction data
actual_data_scaled = y_test_scaled[best_idx]  # Scaled actual data
plot_prediction_candlestick(pred_data_scaled, actual_data_scaled, input_data,
                           f"Best Prediction (Index {best_idx})", 
                           prediction_errors[best_idx])


In [None]:
# Cell 9: Worst Prediction Candlestick Chart
print("📈 Worst Prediction Candlestick Chart:")
# Get input data for the worst prediction (last sequence from training data)
input_data = X_test_scaled[worst_idx]  # This is the input sequence that led to the prediction
# Get scaled predictions directly from the model (not unscaled from evaluate_model)
pred_data_scaled = model.predict(X_test_scaled[worst_idx:worst_idx+1], verbose=0)[0]  # Scaled prediction data
actual_data_scaled = y_test_scaled[worst_idx]  # Scaled actual data
plot_prediction_candlestick(pred_data_scaled, actual_data_scaled, input_data,
                           f"Worst Prediction (Index {worst_idx})", 
                           prediction_errors[worst_idx])


In [None]:
# Cell 10: Random Prediction Candlestick Chart
print("📈 Random Prediction Candlestick Chart:")
# Get a random prediction index
import random
random_idx = random.randint(0, len(y_pred) - 1)
print(f"Random Index: {random_idx}")

# Get input data for the random prediction
input_data = X_test_scaled[random_idx]  # This is the input sequence that led to the prediction
# Get model predictions in scaled format (same as best/worst charts)
pred_data_scaled = model.predict(X_test_scaled[random_idx:random_idx+1], verbose=0)[0]  # Scaled prediction data
actual_data_scaled = y_test_scaled[random_idx]  # Scaled actual data
plot_prediction_candlestick(pred_data_scaled, actual_data_scaled, input_data,
                           f"Random Prediction (Index {random_idx})", 
                           prediction_errors[random_idx])


In [None]:
# Cell 9: Summary
print(f"✅ Complete: {CONFIG_MODE.upper()} mode | {config.symbol} {config.timeframe}")
print(f"📊 Data: {data_summary['total_rows']} rows → {feature_info['total_sequences']} sequences")
print(f"🤖 Model: {model.count_params():,} params")
