In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Load dataset
file_path = "./Traffic Dept.csv"
df = pd.read_csv(file_path)

In [35]:
# Encode categorical variables
label_encoder = LabelEncoder()
df["Department"] = label_encoder.fit_transform(df["Department"])

# Normalize numerical features
scaler = MinMaxScaler()
df[["Accident_Count"]] = scaler.fit_transform(df[["Accident_Count"]])

# Sort by department and year
df = df.sort_values(by=["Department", "Year"]).reset_index(drop=True)

# Define sequence length
SEQ_LENGTH = 3
X, y = [], []

departments = df["Department"].unique()
for dept in departments:
    dept_data = df[df["Department"] == dept].reset_index(drop=True)
    for i in range(len(dept_data) - SEQ_LENGTH):
        X.append(dept_data.iloc[i:i+SEQ_LENGTH][["Accident_Count"]].values)
        y.append(dept_data.iloc[i+SEQ_LENGTH]["Accident_Count"])

X, y = np.array(X), np.array(y)

In [37]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Build LSTM Model
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(SEQ_LENGTH, 1)),
    LSTM(50, activation='relu'),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fddcd214df0>

In [38]:
# Predict
y_pred = model.predict(X_test)

# Convert predictions back to original scale
y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_pred_original = scaler.inverse_transform(y_pred).flatten()

# Print sample predictions
for i in range(10):
    print(f"Actual: {y_test_original[i]:.2f}, Predicted: {y_pred_original[i]:.2f}")

Actual: 1463.00, Predicted: 1291.44
Actual: 1512.00, Predicted: 1304.88
Actual: 851.00, Predicted: 1304.87
Actual: 1105.00, Predicted: 1067.33
Actual: 1512.00, Predicted: 1189.04
Actual: 1236.00, Predicted: 1256.72
Actual: 1033.00, Predicted: 803.81
Actual: 1233.00, Predicted: 969.19
Actual: 1374.00, Predicted: 1170.96
Actual: 1398.00, Predicted: 1265.61


In [39]:
# Print sample predictions
for i in range(10):
    print(f"Actual: {y_test_original[i]:.2f}, Predicted: {y_pred_original[i]:.2f}")

# Forecast for 2024-2026
future_years = [2024, 2025, 2026]
forecast_results = {}

for dept in departments:
    dept_data = df[df["Department"] == dept].reset_index(drop=True)
    last_sequence = dept_data.iloc[-SEQ_LENGTH:][["Accident_Count"]].values.reshape(1, SEQ_LENGTH, 1)
    predictions = []
    
    for year in future_years:
        pred = model.predict(last_sequence)[0, 0]
        predictions.append(pred)
        last_sequence = np.roll(last_sequence, -1)
        last_sequence[0, -1, 0] = pred
    
    # Convert predictions back to original scale
    predictions_original = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
    forecast_results[dept] = dict(zip(future_years, predictions_original))

Actual: 1463.00, Predicted: 1291.44
Actual: 1512.00, Predicted: 1304.88
Actual: 851.00, Predicted: 1304.87
Actual: 1105.00, Predicted: 1067.33
Actual: 1512.00, Predicted: 1189.04
Actual: 1236.00, Predicted: 1256.72
Actual: 1033.00, Predicted: 803.81
Actual: 1233.00, Predicted: 969.19
Actual: 1374.00, Predicted: 1170.96
Actual: 1398.00, Predicted: 1265.61


In [42]:
# Print future predictions
for dept, predictions in forecast_results.items():
    # Transform the encoded department value back to the original name
    original_dept = label_encoder.inverse_transform([dept])[0]
    print(f"Department {original_dept}:")
    for year, pred in predictions.items():
        print(f"  Year {year}: Predicted Accidents {pred:.2f}")

Department Al Maamora:
  Year 2024: Predicted Accidents 1307.50
  Year 2025: Predicted Accidents 1299.76
  Year 2026: Predicted Accidents 1291.93
Department Al Rayyan:
  Year 2024: Predicted Accidents 1287.72
  Year 2025: Predicted Accidents 1293.01
  Year 2026: Predicted Accidents 1284.09
Department Al Shammal:
  Year 2024: Predicted Accidents 1251.65
  Year 2025: Predicted Accidents 1268.27
  Year 2026: Predicted Accidents 1270.76
Department Al Thumama (Al Mattar):
  Year 2024: Predicted Accidents 1251.99
  Year 2025: Predicted Accidents 1272.14
  Year 2026: Predicted Accidents 1266.95
Department Dukhan:
  Year 2024: Predicted Accidents 174.78
  Year 2025: Predicted Accidents 174.56
  Year 2026: Predicted Accidents 162.56
Department Industerid area:
  Year 2024: Predicted Accidents 724.48
  Year 2025: Predicted Accidents 839.92
  Year 2026: Predicted Accidents 1015.96
Department Madinatt khalifah:
  Year 2024: Predicted Accidents 1272.97
  Year 2025: Predicted Accidents 1287.35
  Yea

In [43]:
# Create an empty list to store the results
results = []
# Loop through forecast results and compile the data
for dept, predictions in forecast_results.items():
    original_dept = label_encoder.inverse_transform([dept])[0]
    for year, pred in predictions.items():
        results.append({
            "Department": original_dept,
            "Year": year,
            "Accident Count": pred
        })

# Create a DataFrame from the results
df_results = pd.DataFrame(results)

# Sort the DataFrame by year
df_results = df_results.sort_values(by="Year")

# Reset the index
df_results = df_results.reset_index(drop=True)

# Display the DataFrame
print(df_results)

                Department  Year  Accident Count
0               Al Maamora  2024     1307.500122
1        Madinatt khalifah  2024     1272.966919
2                   South   2024     1224.997070
3                Al Rayyan  2024     1287.718262
4                   Dukhan  2024      174.778030
5               Al Shammal  2024     1251.651123
6          Industerid area  2024      724.482117
7   Al Thumama (Al Mattar)  2024     1251.988647
8                   Dukhan  2025      174.555206
9                   South   2025     1255.662109
10       Madinatt khalifah  2025     1287.350952
11              Al Shammal  2025     1268.266724
12               Al Rayyan  2025     1293.012695
13              Al Maamora  2025     1299.764526
14  Al Thumama (Al Mattar)  2025     1272.135742
15         Industerid area  2025      839.915833
16       Madinatt khalifah  2026     1274.315674
17  Al Thumama (Al Mattar)  2026     1266.946045
18                  Dukhan  2026      162.555710
19              Al S

In [None]:
df_results.to_csv("./Traffic Dept Predictions.csv")