<a href="https://colab.research.google.com/github/rushen-rai/LSTM_Based_Time_Series/blob/main/LSTM_Based_Time_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# LSTM Time Series Forecasting - International Airline Passengers
# For Google Colab

# Install required packages (if needed)
# !pip install tensorflow numpy pandas matplotlib scikit-learn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)

# International Airline Passengers dataset (1949-1960, monthly)
data = np.array([
    112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118,
    115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140,
    145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166,
    171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194,
    196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201,
    204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229,
    242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278,
    284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306,
    315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336,
    340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337,
    360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405,
    417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432
])

print(f"Dataset shape: {data.shape}")
print(f"Total data points: {len(data)}")

# Visualize the raw data
plt.figure(figsize=(14, 5))
plt.plot(data, linewidth=2)
plt.title('International Airline Passengers (1949-1960)', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.reshape(-1, 1))

print(f"Scaled data shape: {scaled_data.shape}")

# Create sequences for LSTM
def create_sequences(data, lookback=12):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Create sequences with 12-month lookback
lookback = 12
X, y = create_sequences(scaled_data, lookback)

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Reshape X for LSTM input: [samples, time steps, features]
X = X.reshape(X.shape[0], X.shape[1], 1)
print(f"Reshaped X: {X.shape}")

# Split data into train and test sets (80/20 split)
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

print(f"\nTraining set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(lookback, 1)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

# Display model architecture
model.summary()

# Train the model
print("\n" + "="*50)
print("TRAINING MODEL")
print("="*50)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=16,
    validation_data=(X_test, y_test),
    verbose=1
)

# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(history.history['loss'], label='Training Loss', linewidth=2)
ax1.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
ax1.set_title('Model Loss', fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss (MSE)', fontsize=12)
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(history.history['mae'], label='Training MAE', linewidth=2)
ax2.plot(history.history['val_mae'], label='Validation MAE', linewidth=2)
ax2.set_title('Model MAE', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('MAE', fontsize=12)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Make predictions on the entire dataset
predictions = model.predict(X)
predictions = scaler.inverse_transform(predictions)

# Get actual values (excluding first 'lookback' months)
actual = data[lookback:]

# Calculate metrics
mse = mean_squared_error(actual, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, predictions)

print("\n" + "="*50)
print("MODEL PERFORMANCE")
print("="*50)
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"Mean Absolute Percentage Error: {(mae/actual.mean())*100:.2f}%")

# Visualize predictions vs actual
plt.figure(figsize=(14, 6))
plt.plot(range(lookback, len(data)), actual, label='Actual', linewidth=2, color='blue')
plt.plot(range(lookback, len(data)), predictions, label='Predicted', linewidth=2, linestyle='--', color='red')
plt.title('LSTM Predictions vs Actual Values', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Forecast future values (next 12 months)
print("\n" + "="*50)
print("FORECASTING NEXT 12 MONTHS")
print("="*50)

# Get the last 'lookback' values
last_sequence = scaled_data[-lookback:]
future_predictions = []

# Predict next 12 months iteratively
for i in range(12):
    # Reshape for prediction
    current_sequence = last_sequence.reshape(1, lookback, 1)

    # Predict next value
    next_pred = model.predict(current_sequence, verbose=0)
    future_predictions.append(next_pred[0, 0])

    # Update sequence (remove first value, add prediction)
    last_sequence = np.append(last_sequence[1:], next_pred)

# Inverse transform predictions
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

print("\nForecast for next 12 months:")
for i, pred in enumerate(future_predictions, 1):
    print(f"Month {len(data) + i}: {pred[0]:.0f} passengers")

# Visualize with forecast
plt.figure(figsize=(14, 6))
plt.plot(range(len(data)), data, label='Historical Data', linewidth=2, color='blue')
plt.plot(range(lookback, len(data)), predictions, label='LSTM Predictions', linewidth=2, linestyle='--', color='red')
plt.plot(range(len(data), len(data) + 12), future_predictions, label='12-Month Forecast', linewidth=2, linestyle='--', color='green', marker='o')
plt.axvline(x=len(data)-1, color='gray', linestyle=':', linewidth=2, label='Forecast Start')
plt.title('LSTM Time Series Forecasting with Future Predictions', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Display summary statistics
print("\n" + "="*50)
print("SUMMARY STATISTICS")
print("="*50)
print(f"Historical Data Range: {data.min()} - {data.max()}")
print(f"Forecast Range: {future_predictions.min():.0f} - {future_predictions.max():.0f}")
print(f"Average Growth (Forecast): {future_predictions.mean():.0f} passengers")
print(f"Total Forecast Increase: {(future_predictions[-1] - data[-1])[0]:.0f} passengers")

KeyboardInterrupt: 