# 4.Price Prediction

In [1]:
import sys
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.callbacks import EarlyStopping
sys.path.append('../config')  
from model_config import ModelConfig

2024-11-24 22:52:09.651148: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load scalers
scaler_X = joblib.load('../models/scaler_X.pkl')
scaler_y = joblib.load('../models/scaler_y.pkl')

# Load normalized data
normalized_data = joblib.load('../models/normalized_data.pkl')
X_normalized = normalized_data['X_normalized']
y_normalized = normalized_data['y_normalized']

In [3]:
# Step 1: Create sequences for time series prediction
def create_sequences(X, y, time_steps= ModelConfig.TIME_STEPS):
    """
    Create sequences for time series prediction
    Args:
        X: Input features DataFrame
        y: Target values
        time_steps: Number of time steps to look back
    Returns:
        Arrays of sequences for X and y
    """
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i + time_steps)].values)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [4]:
# Create sequences
X_seq, y_seq = create_sequences(X_normalized, y_normalized, ModelConfig.TIME_STEPS)
print("Shape of the input sequences:", X_seq.shape)
print("Shape of the output sequences:", y_seq.shape)

Shape of the input sequences: (4630, 30, 4)
Shape of the output sequences: (4630, 1)


In [5]:
# Split data
train_size = int(len(X_seq) * ModelConfig.TRAIN_SPLIT)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]

In [6]:
# Print the shapes of the resulting datasets
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)
print("Training Labels Shape:", y_train.shape)
print("Testing Labels Shape:", y_test.shape)

Training Data Shape: (3704, 30, 4)
Testing Data Shape: (926, 30, 4)
Training Labels Shape: (3704, 1)
Testing Labels Shape: (926, 1)


# 4.1 LSTM Model Construction

In [7]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error

#Create LSTM model
def create_lstm_model(input_shape, units=[64, 32], dropout=0.5):
    """
    Create LSTM model with original configuration
    Args:
        input_shape: Shape of input data (TIME_STEPS, features)
        units: List of units for LSTM layers [default: [64, 32]]
        dropout: Dropout rate [default: 0.5]
    """
    model = Sequential()
    
    # First LSTM layer with return sequences
    model.add(LSTM(
        units[0],
        return_sequences=True,
        activation=ModelConfig.MODEL_PARAMS['activation'],
        recurrent_activation=ModelConfig.MODEL_PARAMS['recurrent_activation'],
        input_shape=input_shape
    ))
    model.add(Dropout(dropout))
    
    # Second LSTM layer
    model.add(LSTM(units[1]))
    model.add(Dropout(dropout))
    
    # Output layer
    model.add(Dense(1))
    
    return model


In [8]:
# Create model
model = create_lstm_model(
    input_shape=(ModelConfig.TIME_STEPS, X_train.shape[2]),
    units=ModelConfig.MODEL_PARAMS['units'],
    dropout=ModelConfig.MODEL_PARAMS['dropout']
)

model.compile(
    optimizer=Adam(learning_rate=ModelConfig.TRAINING_PARAMS['learning_rate']),
    loss='mse',
    metrics=['mae']
)

  super().__init__(**kwargs)


## 4.2 Model Training

In [9]:
import time

# Create a dictionary to store timing metrics
timing_metrics = {
    'training_time': 0,
    'prediction_time': 0,
    'total_time': 0
}


In [10]:
# Training with timing
print("Starting model training...")
training_start = time.time()

# Create early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',          # Monitor validation loss for improvement
    patience=5,                  # Number of epochs to wait before stopping
    restore_best_weights=True,   # Restore model weights from the epoch with the best value
    min_delta=0.0001            # Minimum change to qualify as an improvement
)

# Train model with early stopping
history = model.fit(
    X_train, y_train,
    epochs=ModelConfig.TRAINING_PARAMS['epochs'],
    batch_size=ModelConfig.TRAINING_PARAMS['batch_size'],
    validation_split=ModelConfig.TRAINING_PARAMS['validation_split'],
    callbacks=[early_stopping],  # Add early stopping callback
    verbose=1
)


Starting model training...
Epoch 1/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 40ms/step - loss: 0.0046 - mae: 0.0387 - val_loss: 0.0069 - val_mae: 0.0610
Epoch 2/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 1.8893e-04 - mae: 0.0093 - val_loss: 0.0119 - val_mae: 0.0835
Epoch 3/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 1.5181e-04 - mae: 0.0082 - val_loss: 0.0075 - val_mae: 0.0642
Epoch 4/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 48ms/step - loss: 1.2847e-04 - mae: 0.0076 - val_loss: 0.0024 - val_mae: 0.0348
Epoch 5/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 1.5902e-04 - mae: 0.0086 - val_loss: 0.0080 - val_mae: 0.0653
Epoch 6/100
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 52ms/step - loss: 9.9932e-05 - mae: 0.0070 - val_loss: 0.0026 - val_mae: 0.0374
Epoch 7/100
[1m93/93[0m [32m━━━━━━━━━━

In [13]:
training_end = time.time()
timing_metrics['training_time'] = training_end - training_start

#Update model metrics dictionary to include early stopping information
model_metrics = {
    # Runtime metrics
    'training_time': training_end - training_start,
    
    # Training history
    'history': history.history,  # including loss, mae
    
    # Final metrics
    'final_metrics': {
        'mae': history.history['mae'][-1],          # Mean Absolute Error
        'val_mae': history.history['val_mae'][-1],  # Validation MAE
        'mape': history.history['mape'][-1] if 'mape' in history.history else None,  # MAPE if available
        'stopped_epoch': early_stopping.stopped_epoch + 1 if early_stopping.stopped_epoch > 0 else None  # Add actual training epochs
    }
}

In [14]:
# save model and trainning time
model.save('../models/lstm_model.keras')
joblib.dump(history.history, '../models/training_history.pkl')
joblib.dump(timing_metrics, '../models/training_metrics.pkl')

# Print training summary
print(f"\nTraining completed in {timing_metrics['training_time']:.2f} seconds")
if model_metrics['final_metrics']['stopped_epoch']:
    print(f"Early stopping occurred at epoch {model_metrics['final_metrics']['stopped_epoch']}")


Training completed in 162.87 seconds
Early stopping occurred at epoch 9
