## All Necessary Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import time
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import itertools

### Change input path to training data accordingly 

In [2]:
input_path = r'C:\Users\Parthasarathy.Harini\Downloads\NLP\DeepLearning\DeepLearning_Group12\traffic.csv'

In [3]:
df = pd.read_csv(input_path)

print(df.head())
print(df.info())

              DateTime  Junction  Vehicles           ID
0  2015-11-01 00:00:00         1        15  20151101001
1  2015-11-01 01:00:00         1        13  20151101011
2  2015-11-01 02:00:00         1        10  20151101021
3  2015-11-01 03:00:00         1         7  20151101031
4  2015-11-01 04:00:00         1         9  20151101041
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48120 entries, 0 to 48119
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   DateTime  48120 non-null  object
 1   Junction  48120 non-null  int64 
 2   Vehicles  48120 non-null  int64 
 3   ID        48120 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 1.5+ MB
None


## Data Pre Processing 

In [4]:
# Convert datetime to proper format
df['DateTime'] = pd.to_datetime(df['DateTime'])
df = df.sort_values('DateTime')

# Extract time-based features
df['hour'] = df['DateTime'].dt.hour
df['day_of_week'] = df['DateTime'].dt.dayofweek
df['month'] = df['DateTime'].dt.month
df['year'] = df['DateTime'].dt.year

# Check data distribution by junction
junction_counts = df.groupby('Junction').size()
print("Data points per junction:")
print(junction_counts)

# Split data by time (2015-2016 for training, 2017 for validation)
train_data = df[df['DateTime'].dt.year < 2017].copy()
val_data = df[df['DateTime'].dt.year == 2017].copy()

print(f"Training data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")

# Check if we have validation data for each junction
val_junction_counts = val_data.groupby('Junction').size()
print("Validation data points per junction:")
print(val_junction_counts)

# Normalize the data
scaler = MinMaxScaler()
df['Vehicles_scaled'] = scaler.fit_transform(df[['Vehicles']])
train_data['Vehicles_scaled'] = scaler.transform(train_data[['Vehicles']])
val_data['Vehicles_scaled'] = scaler.transform(val_data[['Vehicles']])


Data points per junction:
Junction
1    14592
2    14592
3    14592
4     4344
dtype: int64
Training data shape: (30744, 8)
Validation data shape: (17376, 8)
Validation data points per junction:
Junction
1    4344
2    4344
3    4344
4    4344
dtype: int64


## Sequencing data for training

In [5]:
def create_sequences(data, junction_id, seq_length=24):
   
    junction_data = data[data['Junction'] == junction_id].copy()
    junction_data = junction_data.sort_values('DateTime')

    if len(junction_data) <= seq_length:
        print(f"Warning: Junction {junction_id} has only {len(junction_data)} data points, which is not enough to create sequences of length {seq_length}")
        return np.array([]), np.array([])

    X, y = [], []
    for i in range(len(junction_data) - seq_length):
        X.append(junction_data['Vehicles_scaled'].iloc[i:i+seq_length].values)
        y.append(junction_data['Vehicles_scaled'].iloc[i+seq_length])

    return np.array(X), np.array(y)

junctions = df['Junction'].unique()
X_train_all = []
y_train_all = []
X_val_all = []
y_val_all = []

for junction in junctions:
    X_train, y_train = create_sequences(train_data, junction)
    X_val, y_val = create_sequences(val_data, junction)

    if X_train.size > 0 and y_train.size > 0:
        X_train_all.append(X_train)
        y_train_all.append(y_train)
        print(f"Junction {junction} - Training sequences: {len(X_train)}")
    else:
        print(f"Junction {junction} - No training sequences created")

    if X_val.size > 0 and y_val.size > 0:
        X_val_all.append(X_val)
        y_val_all.append(y_val)
        print(f"Junction {junction} - Validation sequences: {len(X_val)}")
    else:
        print(f"Junction {junction} - No validation sequences created")

if not X_train_all or not y_train_all or not X_val_all or not y_val_all:
    raise ValueError("No sequences were created. Please check your data.")
X_train = np.vstack(X_train_all)
y_train = np.concatenate(y_train_all)
X_val = np.vstack(X_val_all)
y_val = np.concatenate(y_val_all)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")

Junction 1 - Training sequences: 10224
Junction 1 - Validation sequences: 4320
Junction 2 - Training sequences: 10224
Junction 2 - Validation sequences: 4320
Junction 3 - Training sequences: 10224
Junction 3 - Validation sequences: 4320
Junction 4 - No training sequences created
Junction 4 - Validation sequences: 4320
X_train shape: (30672, 24)
y_train shape: (30672,)
X_val shape: (17280, 24)
y_val shape: (17280,)


## Load Data

In [6]:
class TrafficDataset(Dataset):
    def __init__(self, X, y):
        X_reshaped = X.reshape(X.shape[0], X.shape[1], 1)
        self.X = torch.tensor(X_reshaped, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).view(-1, 1)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TrafficDataset(X_train, y_train)
val_dataset = TrafficDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


### CNN+LSTM with grid Search to optimize

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [8]:
class TrafficCNNLSTM(nn.Module):
    def __init__(self, seq_length, lstm_hidden_size=64, lstm_layers=2, cnn_filters=(64, 128, 128), 
                dropout_rate=0.2, kernel_size=3):
        super(TrafficCNNLSTM, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=cnn_filters[0], kernel_size=kernel_size, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        
        self.conv2 = nn.Conv1d(in_channels=cnn_filters[0], out_channels=cnn_filters[1], kernel_size=kernel_size, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        
        self.conv3 = nn.Conv1d(in_channels=cnn_filters[1], out_channels=cnn_filters[2], kernel_size=kernel_size, padding=1)
        self.relu3 = nn.ReLU()
        
        self.cnn_output_size = cnn_filters[2]
        self.reduced_seq_length = seq_length // 4
        
        self.lstm = nn.LSTM(
            input_size=self.cnn_output_size,
            hidden_size=lstm_hidden_size,
            num_layers=lstm_layers,
            batch_first=True,
            dropout=dropout_rate if lstm_layers > 1 else 0,
            bidirectional=False
        )
        
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(lstm_hidden_size, 32)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)
    
    def forward(self, x):
        batch_size = x.size(0)
        
        if x.dim() == 3 and x.size(2) == 1:
            x = x.permute(0, 2, 1)
        elif x.dim() == 2:
            x = x.unsqueeze(1)
        
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.relu3(self.conv3(x))
        
        x = x.permute(0, 2, 1)
        
        lstm_out, _ = self.lstm(x)
        
        lstm_out = lstm_out[:, -1, :]
        
        x = self.dropout(lstm_out)
        x = self.relu4(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [9]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50, patience=10):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        start_time = time.time()
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
        train_losses.append(train_loss)
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)
        
        val_loss /= len(val_loader.dataset)
        val_losses.append(val_loss)
        epoch_time = time.time() - start_time
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f} | Time: {epoch_time:.2f}s")
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
    
    if best_model_state:
        model.load_state_dict(best_model_state)
    
    return model, train_losses, val_losses, best_val_loss  


In [10]:
def evaluate_model(model, val_loader, scaler):
    model.eval()
    predictions = []
    actual_values = []

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            predictions.append(outputs.cpu().numpy())
            actual_values.append(targets.cpu().numpy())

    y_pred_scaled = np.vstack(predictions)
    y_true_scaled = np.vstack(actual_values)

    y_pred = scaler.inverse_transform(y_pred_scaled)
    y_true = scaler.inverse_transform(y_true_scaled)

    mse = mean_squared_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_true, y_pred)

    return mse, rmse, r2, y_pred, y_true



In [11]:
def grid_search(train_loader, val_loader, scaler, param_grid, num_epochs=50, patience=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    keys = param_grid.keys()
    values = param_grid.values()
    hyperparameter_combinations = list(itertools.product(*values))
    
    results = []
    
    best_val_loss = float('inf')
    best_model = None
    best_params = None
    best_train_losses = None
    best_val_losses = None
    
    total_combinations = len(hyperparameter_combinations)
    for i, combination in enumerate(hyperparameter_combinations):
        params = dict(zip(keys, combination))
        print(f"\nTraining combination {i+1}/{total_combinations}:")
        for k, v in params.items():
            print(f"  {k}: {v}")
        
        sample_input, _ = next(iter(train_loader))
        seq_length = sample_input.shape[1]
        
        model = TrafficCNNLSTM(
            seq_length=seq_length,
            lstm_hidden_size=params['lstm_hidden_size'],
            lstm_layers=params['lstm_layers'],
            cnn_filters=params['cnn_filters'],
            dropout_rate=params['dropout_rate'],
            kernel_size=params['kernel_size']
        ).to(device)
        
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
        criterion = nn.MSELoss()
        
        start_time = time.time()
        trained_model, train_losses, val_losses, current_val_loss = train_model(
            model, train_loader, val_loader, criterion, optimizer, 
            num_epochs=num_epochs, patience=patience
        )
        train_time = time.time() - start_time
        
        mse, rmse, r2, _, _ = evaluate_model(trained_model, val_loader, scaler)
        
        result = {
            **params,
            'val_loss': current_val_loss,
            'mse': mse,
            'rmse': rmse,
            'r2': r2,
            'train_time': train_time
        }
        results.append(result)
        
        if current_val_loss < best_val_loss:
            best_val_loss = current_val_loss
            best_model = trained_model
            best_params = params
            best_train_losses = train_losses
            best_val_losses = val_losses
            
        print(f"Results for combination {i+1}/{total_combinations}:")
        print(f"  Validation Loss: {current_val_loss:.6f}")
        print(f"  RMSE: {rmse:.4f}")
        print(f"  R²: {r2:.4f}")
        print(f"  Training Time: {train_time:.2f}s")
        print("-" * 50)
    
    results_df = pd.DataFrame(results)
    
    results_df = results_df.sort_values('val_loss')
    
    print("\nGrid Search Complete!")
    print(f"Best hyperparameters:")
    for k, v in best_params.items():
        print(f"  {k}: {v}")
    print(f"Best validation loss: {best_val_loss:.6f}")
    print(f"Best RMSE: {results_df.iloc[0]['rmse']:.4f}")
    print(f"Best R²: {results_df.iloc[0]['r2']:.4f}")
    
    plt.figure(figsize=(12, 6))
    plt.plot(best_train_losses, label='Training Loss')
    plt.plot(best_val_losses, label='Validation Loss')
    plt.title('Best Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    if len(results_df) > 3:
        fig, axes = plt.subplots(3, 2, figsize=(15, 12))
        axes = axes.flatten()
        
        for i, param in enumerate(param_grid.keys()):
            if i < len(axes):
                if param == 'cnn_filters':
                    results_df['cnn_filters_str'] = results_df['cnn_filters'].apply(str)
                    df_plot = results_df.groupby('cnn_filters_str')['rmse'].mean().reset_index()
                    axes[i].bar(df_plot['cnn_filters_str'], df_plot['rmse'])
                    axes[i].set_xticklabels(df_plot['cnn_filters_str'], rotation=90)
                else:
                    df_plot = results_df.groupby(param)['rmse'].mean().reset_index()
                    axes[i].bar(df_plot[param].astype(str), df_plot['rmse'])
                
                axes[i].set_title(f'Effect of {param} on RMSE')
                axes[i].set_ylabel('RMSE')
                axes[i].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    return results_df, best_model, best_params, best_train_losses, best_val_losses

In [12]:
# Define parameter grid
param_grid = {
    'lstm_hidden_size': [32,64,128],
    'lstm_layers': [1,2],
    'cnn_filters': [(32,64,64),(64, 128, 128)],
    'kernel_size': [3,5],
    'dropout_rate': [0.2,0.3],
    'learning_rate': [0.001, 0.0005]
}

results_df, best_model, best_params, best_train_losses, best_val_losses = grid_search(
    train_loader, val_loader, scaler, param_grid, num_epochs=50, patience=10
)
mse, rmse, r2, y_pred, y_true = evaluate_model(best_model, val_loader, scaler)

print(f"Best Model Performance:")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")
plt.figure(figsize=(15, 6))
plt.plot(y_true[:200], label='Actual')
plt.plot(y_pred[:200], label='Predicted')
plt.title('Traffic Prediction: Actual vs Predicted (Best Model)')
plt.xlabel('Time Steps')
plt.ylabel('Number of Vehicles')
plt.legend()
plt.grid(True)
plt.show()


Using device: cuda

Training combination 1/96:
  lstm_hidden_size: 32
  lstm_layers: 1
  cnn_filters: (32, 64, 64)
  kernel_size: 3
  dropout_rate: 0.2
  learning_rate: 0.001
Epoch 1/50 | Train Loss: 0.001931 | Val Loss: 0.001059 | Time: 5.51s


KeyboardInterrupt: 

### Visualisations and saving the final Model

In [None]:
junction_val_indices = {}
current_idx = 0

for i, junction in enumerate(junctions):
        junction_length = len(X_val_all[i])
        junction_val_indices[junction] = (current_idx, current_idx + junction_length)
        current_idx += junction_length
if 'junction_val_indices' in locals():
    for junction, (start_idx, end_idx) in junction_val_indices.items():
        if end_idx > start_idx:
            plt.figure(figsize=(15, 6))
            plt.plot(y_true[start_idx:start_idx+200], label='Actual')
            plt.plot(y_pred[start_idx:start_idx+200], label='Predicted')
            plt.title(f'Traffic Prediction for Junction {junction}: Actual vs Predicted (Best Model)')
            plt.xlabel('Time Steps')
            plt.ylabel('Number of Vehicles')
            plt.legend()
            plt.grid(True)
            plt.show()

torch.save(best_model.state_dict(), 'best_traffic_cnn_lstm_model.pth')
print("Best model saved successfully.")
