In [3]:
pip install tensorflow[and-cuda]

Collecting tensorflow[and-cuda]
  Using cached tensorflow-2.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow[and-cuda])
  Using cached absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting astunparse>=1.6.0 (from tensorflow[and-cuda])
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow[and-cuda])
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow[and-cuda])
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow[and-cuda])
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow[and-cuda])
  Using cached libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow[and-cuda])
  Using 

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Load cleaned data
df = pd.read_csv('dublin_connolly_clean_with_history.csv')

prev_stations_num = 10
prev_stations = [f'prev_station_{i}' for i in range(1, prev_stations_num+1)]

# Encode categorical features
categorical_cols = ['TrainOrigin', 'TrainDestination'] + prev_stations
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = encoder.fit_transform(df[categorical_cols])

encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))
df_final = pd.concat([df.drop(columns=categorical_cols), encoded_df], axis=1)

# Define features and target
X = df_final.drop(columns=['delay_minutes'])
y = df_final['delay_minutes']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Evaluation
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"📊 {model_name} Results:")
    print(f"   MAE : {mae:.2f} minutes")
    print(f"   RMSE: {rmse:.2f} minutes\n")

evaluate(y_test, y_pred_lr, "Linear Regression")
evaluate(y_test, y_pred_rf, "Random Forest Regressor")


📊 Linear Regression Results:
   MAE : 1.31 minutes
   RMSE: 2.69 minutes

📊 Random Forest Regressor Results:
   MAE : 1.21 minutes
   RMSE: 2.26 minutes



In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

# Load the cleaned dataset
df = pd.read_csv('dublin_connolly_clean_with_history.csv')

prev_stations_num = 10
prev_stations = [f'prev_station_{i}' for i in range(1, prev_stations_num+1)]

# Encode categorical features
categorical_cols = ['TrainOrigin', 'TrainDestination'] + prev_stations
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = encoder.fit_transform(df[categorical_cols])

encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))
df_final = pd.concat([df.drop(columns=categorical_cols), encoded_df], axis=1)

# Define features and target
X = df_final.drop(columns=['delay_minutes'])
y = df_final['delay_minutes']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape input for RNN: [samples, time steps, features]
# Here we assume each sample is one timestep (you could adapt for sequences if needed)
X_train_rnn = np.expand_dims(X_train, axis=1)  # Shape: (samples, 1, features)
X_test_rnn = np.expand_dims(X_test, axis=1)

# Build RNN model
model = Sequential([
    SimpleRNN(64, activation='tanh', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train_rnn, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

# Predict on the test set
y_pred_rnn = model.predict(X_test_rnn).flatten()

# Evaluation function
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"📊 {model_name} Results:")
    print(f"   MAE : {mae:.2f} minutes")
    print(f"   RMSE: {rmse:.2f} minutes\n")

# Evaluate RNN
evaluate(y_test, y_pred_rnn, "Recurrent Neural Network (RNN)")


Epoch 1/50


  super().__init__(**kwargs)


[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 28.5283 - val_loss: 7.7613
Epoch 2/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 13.6126 - val_loss: 6.9904
Epoch 3/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 11.4899 - val_loss: 6.6641
Epoch 4/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 8.0157 - val_loss: 6.7676
Epoch 5/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 13.0364 - val_loss: 6.3905
Epoch 6/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 21.1070 - val_loss: 6.2995
Epoch 7/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 8.7754 - val_loss: 6.1705
Epoch 8/50
[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 23.7528 - val_loss: 5.9020
Epoch 9/50
[1m335/335[0m [32m━━━━━━━━━━━━━

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the cleaned dataset
df = pd.read_csv('dublin_connolly_clean_with_history.csv')

prev_stations_num = 10
prev_stations = [f'prev_station_{i}' for i in range(1, prev_stations_num+1)]

# Encode categorical features
categorical_cols = ['TrainOrigin', 'TrainDestination'] + prev_stations
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = encoder.fit_transform(df[categorical_cols])

encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))
df_final = pd.concat([df.drop(columns=categorical_cols), encoded_df], axis=1)

# Define features and target
X = df_final.drop(columns=['delay_minutes'])
y = df_final['delay_minutes']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape input for LSTM: [samples, time steps, features]
X_train_lstm = np.expand_dims(X_train, axis=1)  # One time step per sample
X_test_lstm = np.expand_dims(X_test, axis=1)

# Build a deeper LSTM model
model = Sequential([
    LSTM(128, activation='tanh', return_sequences=True, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])),
    Dropout(0.3),  # Dropout after first LSTM layer
    
    LSTM(64, activation='tanh', return_sequences=False),
    Dropout(0.3),  # Dropout after second LSTM layer
    
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train_lstm, y_train, epochs=120, batch_size=16, validation_split=0.1, verbose=1)

# Predict on the test set
y_pred_lstm = model.predict(X_test_lstm).flatten()

# Evaluation function
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"📊 {model_name} Results:")
    print(f"   MAE : {mae:.2f} minutes")
    print(f"   RMSE: {rmse:.2f} minutes\n")

# Evaluate LSTM
evaluate(y_test, y_pred_lstm, "Long Short-Term Memory (LSTM)")


Epoch 1/120


  super().__init__(**kwargs)


[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 14.0132 - val_loss: 8.1762
Epoch 2/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 12.7731 - val_loss: 6.1163
Epoch 3/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 12.9847 - val_loss: 5.7894
Epoch 4/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 34.6055 - val_loss: 6.3866
Epoch 5/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 11.9841 - val_loss: 5.0941
Epoch 6/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 7.6018 - val_loss: 5.5463
Epoch 7/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 9.2500 - val_loss: 6.1508
Epoch 8/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 7.0446 - val_loss: 6.0604
Epoch 9/120
[1m670/670[0m [32m━━━━━━

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the cleaned dataset
df = pd.read_csv('dublin_connolly_clean_with_history.csv')

prev_stations_num = 10
prev_stations = [f'prev_station_{i}' for i in range(1, prev_stations_num+1)]

# Encode categorical features
categorical_cols = ['TrainOrigin', 'TrainDestination'] + prev_stations
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = encoder.fit_transform(df[categorical_cols])

encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))
df_final = pd.concat([df.drop(columns=categorical_cols), encoded_df], axis=1)

# Define features and target
X = df_final.drop(columns=['delay_minutes'])
y = df_final['delay_minutes']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Transformer expects 3D input [batch, sequence, feature]
# Here, we treat each sample as a sequence of length 1
X_train_tf = np.expand_dims(X_train, axis=1)
X_test_tf = np.expand_dims(X_test, axis=1)

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):  # <--- Default None ✅
        attn_output = self.att(inputs, inputs, training=training)  # Pass training
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


# Model building
embed_dim = X_train_tf.shape[2]  # Feature dimension
num_heads = 4
ff_dim = 128

inputs = layers.Input(shape=(X_train_tf.shape[1], X_train_tf.shape[2]))
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(inputs)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(32, activation="relu")(x)
outputs = layers.Dense(1)(x)  # Regression output

model = models.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train_tf, y_train, epochs=120, batch_size=16, validation_split=0.1, verbose=1)

# Predict on the test set
y_pred_tf = model.predict(X_test_tf).flatten()

# Evaluation function
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"📊 {model_name} Results:")
    print(f"   MAE : {mae:.2f} minutes")
    print(f"   RMSE: {rmse:.2f} minutes\n")

# Evaluate Transformer
evaluate(y_test, y_pred_tf, "Transformer")


Epoch 1/120


I0000 00:00:1745680952.389870   45044 service.cc:152] XLA service 0x77905a10f530 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745680952.389883   45044 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 2060, Compute Capability 7.5
2025-04-26 16:22:32.434497: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


[1m108/670[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step - loss: 20.1997

I0000 00:00:1745680954.475465   45044 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 24.8748 - val_loss: 7.4134
Epoch 2/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 10.6768 - val_loss: 11.5685
Epoch 3/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 14.8514 - val_loss: 5.5127
Epoch 4/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7.6129 - val_loss: 12.4905
Epoch 5/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 34.9461 - val_loss: 5.8259
Epoch 6/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 15.2105 - val_loss: 5.8224
Epoch 7/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 32.0931 - val_loss: 9.3615
Epoch 8/120
[1m670/670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 15.2066 - val_loss: 18.5201
Epoch 9/120
[1m670/670[0m [32m━

In [15]:
df

Unnamed: 0,TrainOrigin,TrainDestination,scheduled_hour,day_of_week,month_of_year,prev_station_1,prev_delay_1,prev_station_2,prev_delay_2,prev_station_3,...,prev_delay_6,prev_station_7,prev_delay_7,prev_station_8,prev_delay_8,prev_station_9,prev_delay_9,prev_station_10,prev_delay_10,delay_minutes
0,Dublin Connolly,Belfast,8,1,4,,0.0,,0.0,,...,0.0,,0.0,,0.0,,0.0,,0.0,5.2
1,Dublin Connolly,Belfast,8,5,2,,0.0,,0.0,,...,0.0,,0.0,,0.0,,0.0,,0.0,1.1
2,Dublin Connolly,Belfast,8,5,3,,0.0,,0.0,,...,0.0,,0.0,,0.0,,0.0,,0.0,0.3
3,Dublin Connolly,Belfast,8,4,11,,0.0,,0.0,,...,0.0,,0.0,,0.0,,0.0,,0.0,4.5
4,Dublin Connolly,Portadown,8,1,10,,0.0,,0.0,,...,0.0,,0.0,,0.0,,0.0,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14878,Greystones,Howth,9,1,5,Tara Street,1.8,Dublin Pearse,1.9,Grand Canal Dock,...,2.8,Booterstown,2.8,Blackrock,2.2,Seapoint,2.0,Salthill and Monkstown,2.0,2.6
14879,Bray,Howth,9,2,5,Tara Street,-0.4,Dublin Pearse,-0.1,Grand Canal Dock,...,0.5,Booterstown,0.5,Blackrock,0.3,Seapoint,0.3,Salthill and Monkstown,0.4,2.1
14880,Bray,Howth,9,4,5,Tara Street,0.1,Dublin Pearse,0.5,Grand Canal Dock,...,0.8,Booterstown,1.5,Blackrock,0.8,Seapoint,0.9,Salthill and Monkstown,0.9,0.5
14881,Bray,Howth,9,1,10,Tara Street,-0.1,Dublin Pearse,0.0,Grand Canal Dock,...,0.6,Booterstown,0.9,Blackrock,0.4,Seapoint,0.5,Salthill and Monkstown,0.6,0.7
