In [188]:
import pandas as pd
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset

In [189]:
# Load data and sort them by time
df = pd.read_csv("./Data/traffic.csv")
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.sort_values(by=['DateTime'], inplace=True)

In [190]:
# Encode Junction as a numerical feature
junction_encoder = LabelEncoder()
df['Junction_encoded'] = junction_encoder.fit_transform(df['Junction'])

In [191]:
# Extract time-based features for better pattern recognization
df['Hour'] = df['DateTime'].dt.hour
df['DayOfWeek'] = df['DateTime'].dt.dayofweek
df['Month'] = df['DateTime'].dt.month
df['IsWeekend'] = (df['DayOfWeek'] >= 5).astype(int)

In [192]:
# Normalize data

scaler_dict = {}
numerical_features = ['Vehicles', 'Hour', 'DayOfWeek', 'Month', 'Junction_encoded']
for feature in numerical_features:
    scaler = MinMaxScaler()
    df[f'{feature}_normalized'] = scaler.fit_transform(df[[feature]])
    scaler_dict[feature] = scaler

In [193]:
# For each junction (can possibly be 1), create a RNN/LSTM-like training sequence

def create_sequences_by_junction(data, seq_length):
    feature_cols = [col for col in data.columns if '_normalized' in col]
    sequences = []
    labels = []
    
    # Group by junction and create sequences for each junction
    for junction in data['Junction'].unique():
        junction_data = data[data['Junction'] == junction].copy()
        
        for i in range(len(junction_data) - seq_length):
            seq = junction_data[feature_cols].iloc[i:i+seq_length].values
            label = junction_data['Vehicles_normalized'].iloc[i+seq_length]
            sequences.append(seq)
            labels.append(label)
    
    return np.array(sequences), np.array(labels)

seq_length = 10  # Lookback window, can be modified

# Create data
#X, y = create_sequences_by_junction(df, seq_length)
#X = torch.tensor(X, dtype=torch.float32)
#y = torch.tensor(y, dtype=torch.float32)

In [194]:
# Train test split

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

In [None]:
# Basic LSTM model
class JunctionTrafficLSTM(nn.Module):
    def __init__(self, input_size=5, hidden_size=32, num_layers=5, output_size=1):
        super(JunctionTrafficLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)        
        self.fc5 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.dropout(lstm_out[:, -1, :])
        out = self.relu(self.fc1(out))
        out = self.relu(self.fc2(out))
        out = self.relu(self.fc3(out))
        out = self.relu(self.fc4(out))
        return self.fc5(out)

In [196]:
# Model training set up

# Use GPU for better performance if applicable
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = JunctionTrafficLSTM().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [197]:
def train_model(model, train_loader, val_loader, num_epochs=50, patience=10):
    best_val_loss = float('inf')
    patience_counter = 0
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs.squeeze(), batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs.squeeze(), batch_y).item()
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

In [198]:
def accuracy(model, test_loader, threshold):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            difference = torch.abs(outputs.squeeze() - batch_y)
            correct += (difference <= threshold).sum().item()
            total += batch_y.size(0)
    
    accuracy_percentage = (correct / total) * 100
    print(f"Test Accuracy: {accuracy_percentage:.2f}%")
    return accuracy_percentage

In [199]:
def predict_traffic(model, input_seq, junction, scalers):
    model.eval()
    input_seq = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(0).to(device)
    
    with torch.no_grad():
        prediction = model(input_seq).cpu().numpy()
    
    # Inverse transform the prediction
    return scalers['Vehicles'].inverse_transform(prediction.reshape(-1, 1))

In [200]:
val_size = int(0.1 * len(X_train))
X_val = X_train[-val_size:]
y_val = y_train[-val_size:]
X_train = X_train[:-val_size]
y_train = y_train[:-val_size]

val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=32, shuffle=False)

In [201]:
patience = 10
train_model(model, train_loader=DataLoader(TensorDataset(X_train, y_train), 
            batch_size=32, shuffle=True),
            val_loader=val_loader,patience=patience)

Epoch 1, Train Loss: 0.0035, Val Loss: 0.0027
Epoch 2, Train Loss: 0.0010, Val Loss: 0.0017
Epoch 3, Train Loss: 0.0007, Val Loss: 0.0011
Epoch 4, Train Loss: 0.0006, Val Loss: 0.0010
Epoch 5, Train Loss: 0.0005, Val Loss: 0.0011
Epoch 6, Train Loss: 0.0005, Val Loss: 0.0010
Epoch 7, Train Loss: 0.0005, Val Loss: 0.0010
Epoch 8, Train Loss: 0.0005, Val Loss: 0.0010
Epoch 9, Train Loss: 0.0005, Val Loss: 0.0010
Epoch 10, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 11, Train Loss: 0.0004, Val Loss: 0.0012
Epoch 12, Train Loss: 0.0004, Val Loss: 0.0010
Epoch 13, Train Loss: 0.0004, Val Loss: 0.0010
Epoch 14, Train Loss: 0.0004, Val Loss: 0.0011
Epoch 15, Train Loss: 0.0004, Val Loss: 0.0010
Epoch 16, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 17, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 18, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 19, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 20, Train Loss: 0.0004, Val Loss: 0.0009
Epoch 21, Train Loss: 0.0004, Val Loss: 0.0010
Epoch 22, Train Loss: 

In [220]:
threshold = 0.02
uncertainty = (scaler_dict['Vehicles'].inverse_transform([[threshold]]) - 
                scaler_dict['Vehicles'].inverse_transform([[0]]))[0][0]
print(f"\nBasic Accuracy Evaluation:")
print(f"Threshold is {threshold}, which corresponds to {int(uncertainty)} car(s) uncertainty")
print("Predicted values within this uncertainty are considered correct in the calculation of accuracy")
accuracy(model, test_loader, threshold)


Basic Accuracy Evaluation:
Threshold is 0.02, which corresponds to 3 car(s) uncertainty
Predicted values within this uncertainty are considered correct in the calculation of accuracy
Test Accuracy: 68.30%


68.30282861896838

In [247]:
import numpy as np
import torch

def predict_future_traffic(model, latest_time, num_hours, scalers, seq_length=10):
    """
    Predict traffic for the future time period (in hours) starting from the latest_time.
    The model is used to predict at hourly intervals.

    :param model: Trained LSTM model
    :param latest_time: The latest time in the dataset to start predictions (in datetime format)
    :param num_hours: Number of hours to predict into the future
    :param scalers: Dictionary of scalers for normalizing and inverse transforming data
    :param seq_length: Length of input sequence for the model (default is 10)
    :return: List of predicted traffic values (in number of vehicles) for each hour in the future
    """
    
    model.eval()
    closest_time_idx = (df['DateTime'] - latest_time).abs().argmin()

    print((df['DateTime'] - latest_time).abs().min())

    latest_data = df.iloc[closest_time_idx]

    input_seq = []
    for i in range(seq_length):
        # Ensure the values are converted to a numeric type (np.float32)
        values = latest_data[['Vehicles_normalized', 
                              'Hour_normalized', 'DayOfWeek_normalized', 
                              'Month_normalized', 'Junction_encoded_normalized']].values.astype(np.float32)
        input_seq.append(values)
        
        if closest_time_idx + i + 1 < len(df):
            latest_data = df.iloc[closest_time_idx + i + 1]
        else:
            break
            
    input_seq = np.array(input_seq, dtype=np.float32)
    
    predictions = []

    for _ in range(num_hours):
        input_seq_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(0).to(device)
        
        with torch.no_grad():
            prediction = model(input_seq_tensor).cpu().numpy()
        
        # Inverse transform the prediction
        predicted_vehicles = scalers['Vehicles'].inverse_transform(prediction.reshape(-1, 1))
        predictions.append(predicted_vehicles[0][0])  # Append only the predicted value (first value of the prediction)
        
        # Update the input sequence for the next prediction
        input_seq = np.roll(input_seq, shift=-1, axis=0)
        
        # Create a new input array with predicted values, keeping the same feature count
        new_input = np.array([[ 
            predicted_vehicles[0][0],  # Predicted vehicle count
            input_seq[-1][1],           # Hour
            input_seq[-1][2],           # Day of the week
            input_seq[-1][3],           # Month
            input_seq[-1][4]            # Junction encoding
        ]], dtype=np.float32)
        
        # Add the new prediction into the input sequence
        input_seq[-1] = new_input[0]  # Replace the last entry with new input  
    
    return predictions


In [None]:
latest_time = pd.to_datetime("2015-11-03 09:00:00")  # Example, replace with actual latest time

num_hours = 24  # Predict the next 24 hours

model.load_state_dict(torch.load('best_model.pth'))

predictions = predict_future_traffic(model, latest_time, num_hours, scaler_dict)

# Create a DataFrame to store the predictions
predicted_times = [latest_time + timedelta(hours=i) for i in range(num_hours)]
predicted_traffic_df = pd.DataFrame({
    'Predicted Time': predicted_times,
    'Predicted Traffic (Vehicles)': predictions
})

# Print the DataFrame
print(predicted_traffic_df)

# Optionally, save to a CSV file
predicted_traffic_df.to_csv('predicted_traffic.csv', index=False)

301 days 15:00:00
        Predicted Time  Predicted Traffic (Vehicles)
0  2015-01-03 09:00:00                     11.357082
1  2015-01-03 10:00:00                     92.941544
2  2015-01-03 11:00:00                    108.838860
3  2015-01-03 12:00:00                    149.109604
4  2015-01-03 13:00:00                    167.725433
5  2015-01-03 14:00:00                    167.499908
6  2015-01-03 15:00:00                    166.327026
7  2015-01-03 16:00:00                    165.413773
8  2015-01-03 17:00:00                    163.312180
9  2015-01-03 18:00:00                    160.947250
10 2015-01-03 19:00:00                    158.240295
11 2015-01-03 20:00:00                    156.708755
12 2015-01-03 21:00:00                    156.672165
13 2015-01-03 22:00:00                    156.668472
14 2015-01-03 23:00:00                    156.675430
15 2015-01-04 00:00:00                    156.677017
16 2015-01-04 01:00:00                    156.674347
17 2015-01-04 02:00:00      