In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [55]:
df=pd.read_csv('Prayagraj_Traffic_Data.csv')

In [56]:
df.shape

(10080, 23)

In [57]:
df.isnull().mean()*100

record_id                0.000000
date                     0.000000
timestamp                0.000000
intersection_id          0.000000
intersection_name        0.000000
latitude                 0.000000
longitude                0.000000
vehicle_count            0.000000
average_speed            0.000000
signal_cycle_time        0.000000
green_time               0.000000
yellow_time              0.000000
red_time                 0.000000
weather_condition        0.000000
day_of_week              0.000000
is_weekend               0.000000
is_holiday               0.000000
event                    0.000000
pollution_level          0.000000
car_count                0.000000
bus_count                0.000000
truck_count              0.000000
motorcycle_count         0.992063
dtype: float64

In [58]:
df.head()

Unnamed: 0,record_id,date,timestamp,intersection_id,intersection_name,latitude,longitude,vehicle_count,average_speed,signal_cycle_time,...,weather_condition,day_of_week,is_weekend,is_holiday,event,pollution_level,car_count,bus_count,truck_count,motorcycle_count
0,9926,07-01-2023,21:20:00,6,Allahabad Junction,25.44,81.83,75,39.72,120,...,Clear,Saturday,weekend,0,,1.21,48,4,6,17.0
1,8595,06-01-2023,23:10:00,5,Naini Bridge,25.41,81.86,139,30.33,120,...,Clear,Friday,workday,0,,2.86,89,13,17,20.0
2,673,01-01-2023,11:10:00,3,Chowk,25.43,81.84,20,53.28,120,...,Clear,Sunday,weekend,1,,0.34,15,1,2,2.0
3,8744,07-01-2023,01:40:00,4,Tagore Town,25.47,81.89,21,36.06,120,...,Heavy Rain,Saturday,weekend,0,,0.54,12,4,2,3.0
4,316,01-01-2023,05:10:00,6,Allahabad Junction,25.44,81.83,41,47.21,120,...,Clear,Sunday,weekend,1,,0.86,26,6,2,7.0


In [59]:
df.columns = df.columns.str.strip()
df.drop(['date', 'day_of_week','latitude', 'longitude', 'record_id'], axis=1, inplace=True)

In [60]:
df['event'] = df['event'].apply(lambda x: 0 if pd.isna(x) else 1)

In [61]:
df['is_weekend'] = df['is_weekend'].map({'weekend': 1, 'workday': 0})

In [62]:
df = df.drop(columns=['intersection_id'])

In [63]:
df

Unnamed: 0,timestamp,intersection_name,vehicle_count,average_speed,signal_cycle_time,green_time,yellow_time,red_time,weather_condition,is_weekend,is_holiday,event,pollution_level,car_count,bus_count,truck_count,motorcycle_count
0,21:20:00,Allahabad Junction,75,39.72,120,63,4,53,Clear,,0,1,1.21,48,4,6,17.0
1,23:10:00,Naini Bridge,139,30.33,120,51,4,65,Clear,,0,1,2.86,89,13,17,20.0
2,11:10:00,Chowk,20,53.28,120,69,5,46,Clear,,1,1,0.34,15,1,2,2.0
3,01:40:00,Tagore Town,21,36.06,120,59,4,57,Heavy Rain,,0,1,0.54,12,4,2,3.0
4,05:10:00,Allahabad Junction,41,47.21,120,,5,54,Clear,,1,1,0.86,26,6,2,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10075,06:00:00,Tagore Town,23,52.53,120,50,5,65,Clear,,0,1,0.43,14,2,2,5.0
10076,15:00:00,Katra,34,53.72,120,63,5,52,Clear,,1,1,0.57,20,4,0,10.0
10077,03:40:00,Naini Bridge,22,40.91,120,60,5,55,Fog,,0,1,0.41,13,0,5,4.0
10078,19:20:00,MNNIT Gate,273,21.58,120,82,4,34,Light Rain,,0,1,5.58,167,26,32,48.0


In [64]:
df.isnull().mean()*100

timestamp              0.000000
intersection_name      0.000000
vehicle_count          0.000000
average_speed          0.000000
signal_cycle_time      0.000000
green_time             0.000000
yellow_time            0.000000
red_time               0.000000
weather_condition      0.000000
is_weekend           100.000000
is_holiday             0.000000
event                  0.000000
pollution_level        0.000000
car_count              0.000000
bus_count              0.000000
truck_count            0.000000
motorcycle_count       0.992063
dtype: float64

In [65]:
X = df.drop(columns=['event'])
y = df['event']

In [66]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [67]:
X_train.shape, X_test.shape

((8064, 16), (2016, 16))

In [68]:
X_train.isnull().mean()*100

timestamp              0.000000
intersection_name      0.000000
vehicle_count          0.000000
average_speed          0.000000
signal_cycle_time      0.000000
green_time             0.000000
yellow_time            0.000000
red_time               0.000000
weather_condition      0.000000
is_weekend           100.000000
is_holiday             0.000000
pollution_level        0.000000
car_count              0.000000
bus_count              0.000000
truck_count            0.000000
motorcycle_count       0.992063
dtype: float64

In [69]:
X_train['vehicle_count'] = pd.to_numeric(X_train['vehicle_count'], errors='coerce')
X_train['average_speed'] = pd.to_numeric(X_train['average_speed'], errors='coerce')
X_train['green_time'] = pd.to_numeric(X_train['green_time'], errors='coerce')
X_train['yellow_time'] = pd.to_numeric(X_train['yellow_time'], errors='coerce')
X_train['red_time'] = pd.to_numeric(X_train['red_time'], errors='coerce')
X_train['pollution_level'] = pd.to_numeric(X_train['pollution_level'], errors='coerce')
X_train['car_count'] = pd.to_numeric(X_train['car_count'], errors='coerce')
X_train['bus_count'] = pd.to_numeric(X_train['bus_count'], errors='coerce')
X_train['truck_count'] = pd.to_numeric(X_train['truck_count'], errors='coerce')
X_train['motorcycle_count'] = pd.to_numeric(X_train['motorcycle_count'], errors='coerce')

mean_vehicle_count = X_train['vehicle_count'].mean()
mean_average_speed = X_train['average_speed'].mean()
mean_green_time = X_train['green_time'].mean()
mean_yellow_time = X_train['yellow_time'].mean()
mean_red_time = X_train['red_time'].mean()
mean_pollution_level = X_train['pollution_level'].mean()
mean_car_count = X_train['car_count'].mean()
mean_bus_count = X_train['bus_count'].mean()
mean_truck_count = X_train['truck_count'].mean()
mean_motorcycle_count = X_train['motorcycle_count'].mean()

In [70]:
X_train['vehicle_count'] = X_train['vehicle_count'].fillna(mean_vehicle_count)

X_train['average_speed'] = X_train['average_speed'].fillna(mean_average_speed)

X_train['green_time'] = X_train['green_time'].fillna(mean_green_time)

X_train['yellow_time'] = X_train['yellow_time'].fillna(mean_yellow_time)

X_train['red_time'] = X_train['red_time'].fillna(mean_red_time)

X_train['pollution_level'] = X_train['pollution_level'].fillna(mean_pollution_level)

X_train['car_count'] = X_train['car_count'].fillna(mean_car_count)

X_train['bus_count'] = X_train['bus_count'].fillna(mean_bus_count)

X_train['truck_count'] = X_train['truck_count'].fillna(mean_truck_count)

X_train['motorcycle_count'] = X_train['motorcycle_count'].fillna(mean_motorcycle_count)


In [71]:
X_train['signal_cycle_time'] = 120.0
X_test['signal_cycle_time'] = 120.0

In [72]:
X_train

Unnamed: 0,timestamp,intersection_name,vehicle_count,average_speed,signal_cycle_time,green_time,yellow_time,red_time,weather_condition,is_weekend,is_holiday,pollution_level,car_count,bus_count,truck_count,motorcycle_count
8689,23:20:00,Minto Park,62.0,40.01,120.0,61.0,6.0,53.0,Clear,,1,1.29,32.000000,6.0,8.0,16.000000
831,09:40:00,Phaphamau,192.0,28.96,120.0,87.0,5.0,28.0,Clear,,0,4.43,117.000000,23.0,27.0,25.000000
2568,05:10:00,Tagore Town,126.0,29.58,120.0,62.0,2.0,56.0,Cold Wave,,0,2.10,75.000000,10.0,7.0,34.000000
5445,07:50:00,Chowk,132.0,33.32,120.0,58.0,4.0,58.0,Clear,,0,2.71,79.000000,15.0,12.0,26.000000
1365,03:20:00,Chowk,63.0,31.48,120.0,56.0,5.0,59.0,Fog,,1,1.56,36.000000,11.0,6.0,10.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,06:30:00,MNNIT Gate,71.0,26.08,120.0,53.0,3.0,64.0,Heavy Rain,,0,1.62,39.000000,9.0,9.0,14.000000
2514,15:10:00,Chowk,113.0,33.63,120.0,60.0,4.0,56.0,Clear,,0,2.18,67.000000,11.0,10.0,25.000000
6637,13:40:00,Tagore Town,110.0,36.09,120.0,50.0,5.0,65.0,Clear,,,2.06,62.000000,6.0,16.0,26.000000
2575,18:30:00,Allahabad Junction,153.0,26.49,120.0,78.0,4.0,38.0,Light Rain,,0,3.00,66.676099,17.0,11.0,30.000000
