In [28]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [35]:
# Load traffic dataset
traffic_df = pd.read_csv("Traffic_data.csv")
traffic_df['DateTime'] = pd.to_datetime(traffic_df['DateTime'])

In [36]:
# Load events dataset
events_df = pd.read_csv("Events Data.csv")
events_df['Date'] = pd.to_datetime(events_df['DateTime'], format="%d-%m-%Y").dt.date

In [37]:
# Load weather dataset (skip metadata rows)
weather_df = pd.read_csv("open-meteo-28.44N77.01E228m.csv", skiprows=3)

In [38]:
# Drop empty unnamed column and rename useful ones
weather_df = weather_df.drop(columns=['Unnamed: 5'])
weather_df.columns = [
    'DateTime', 'Temperature_C', 'Precipitation_mm',
    'WindSpeed_kmh', 'Humidity_percent'
]
weather_df['DateTime'] = pd.to_datetime(weather_df['DateTime'])

In [39]:
# Merge traffic and weather on exact hourly DateTime
merged_df = pd.merge(traffic_df, weather_df, on='DateTime', how='left')

In [40]:
# Add date column for joining with daily event data
merged_df['Date'] = merged_df['DateTime'].dt.date

In [41]:
# Merge with event data on date
final_df = pd.merge(merged_df, events_df.drop(columns=['DateTime']), on='Date', how='left')

In [42]:
# Drop temporary Date column
final_df.drop(columns=['Date'], inplace=True)

In [46]:
final_df.dropna(inplace=True)
final_df.reset_index(drop=True, inplace=True)

In [47]:
# Display first 10 rows
print("Integrated Dataset (first 10 rows):\n")
final_df.head(10)

Integrated Dataset (first 10 rows):



Unnamed: 0,DateTime,Junction,Vehicles,Hour,Day,Weekday,Month,Year,IsWeekend,Lag_1hr,Lag_24hr,Temperature_C,Precipitation_mm,WindSpeed_kmh,Humidity_percent,is_public_holiday,is_sports_event,is_concert,is_public_demonstration
0,2015-11-11 00:00:00,1,28,0,11,2,11,2015,0,22.0,20.0,19.8,0.0,7.1,68.0,1.0,0.0,0.0,0.0
1,2015-11-11 01:00:00,1,25,1,11,2,11,2015,0,28.0,14.0,19.2,0.0,7.0,67.0,1.0,0.0,0.0,0.0
2,2015-11-11 02:00:00,1,19,2,11,2,11,2015,0,25.0,14.0,18.6,0.0,7.1,67.0,1.0,0.0,0.0,0.0
3,2015-11-11 03:00:00,1,21,3,11,2,11,2015,0,19.0,17.0,18.1,0.0,7.2,67.0,1.0,0.0,0.0,0.0
4,2015-11-11 04:00:00,1,20,4,11,2,11,2015,0,21.0,11.0,17.7,0.0,7.9,68.0,1.0,0.0,0.0,0.0
5,2015-11-11 05:00:00,1,15,5,11,2,11,2015,0,20.0,12.0,17.4,0.0,8.1,69.0,1.0,0.0,0.0,0.0
6,2015-11-11 06:00:00,1,17,6,11,2,11,2015,0,15.0,12.0,17.0,0.0,8.7,72.0,1.0,0.0,0.0,0.0
7,2015-11-11 07:00:00,1,14,7,11,2,11,2015,0,17.0,14.0,17.4,0.0,10.7,73.0,1.0,0.0,0.0,0.0
8,2015-11-11 08:00:00,1,19,8,11,2,11,2015,0,14.0,14.0,20.4,0.0,10.5,63.0,1.0,0.0,0.0,0.0
9,2015-11-11 09:00:00,1,22,9,11,2,11,2015,0,19.0,19.0,23.1,0.0,11.4,55.0,1.0,0.0,0.0,0.0


In [48]:
final_df.isna().sum()

DateTime                   0
Junction                   0
Vehicles                   0
Hour                       0
Day                        0
Weekday                    0
Month                      0
Year                       0
IsWeekend                  0
Lag_1hr                    0
Lag_24hr                   0
Temperature_C              0
Precipitation_mm           0
WindSpeed_kmh              0
Humidity_percent           0
is_public_holiday          0
is_sports_event            0
is_concert                 0
is_public_demonstration    0
dtype: int64

In [49]:
final_df.duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
43195    False
43196    False
43197    False
43198    False
43199    False
Length: 43200, dtype: bool

In [50]:
final_df.shape

(43200, 19)

In [51]:
final_df['Junction'] = final_df['Junction'].astype(int)
final_df['Vehicles'] = final_df['Vehicles'].astype(int)

In [52]:
final_df.head()

Unnamed: 0,DateTime,Junction,Vehicles,Hour,Day,Weekday,Month,Year,IsWeekend,Lag_1hr,Lag_24hr,Temperature_C,Precipitation_mm,WindSpeed_kmh,Humidity_percent,is_public_holiday,is_sports_event,is_concert,is_public_demonstration
0,2015-11-11 00:00:00,1,28,0,11,2,11,2015,0,22.0,20.0,19.8,0.0,7.1,68.0,1.0,0.0,0.0,0.0
1,2015-11-11 01:00:00,1,25,1,11,2,11,2015,0,28.0,14.0,19.2,0.0,7.0,67.0,1.0,0.0,0.0,0.0
2,2015-11-11 02:00:00,1,19,2,11,2,11,2015,0,25.0,14.0,18.6,0.0,7.1,67.0,1.0,0.0,0.0,0.0
3,2015-11-11 03:00:00,1,21,3,11,2,11,2015,0,19.0,17.0,18.1,0.0,7.2,67.0,1.0,0.0,0.0,0.0
4,2015-11-11 04:00:00,1,20,4,11,2,11,2015,0,21.0,11.0,17.7,0.0,7.9,68.0,1.0,0.0,0.0,0.0


In [53]:
final_df.to_csv("Integrated_Dataset.csv", index=False)

In [54]:
cols_to_normalize = ['Hour', 'Day', 'Weekday', 'Month', 'Year', 'Junction', 'IsWeekend', 'Lag_1hr', 'Lag_24hr','Temperature_C', 'Precipitation_mm', 'WindSpeed_kmh', 'Humidity_percent','is_public_holiday','is_sports_event','is_concert','is_public_demonstration']
scaler = StandardScaler()
final_df[cols_to_normalize] = scaler.fit_transform(final_df[cols_to_normalize])

In [55]:
final_df.head()

Unnamed: 0,DateTime,Junction,Vehicles,Hour,Day,Weekday,Month,Year,IsWeekend,Lag_1hr,Lag_24hr,Temperature_C,Precipitation_mm,WindSpeed_kmh,Humidity_percent,is_public_holiday,is_sports_event,is_concert,is_public_demonstration
0,2015-11-11 00:00:00,-1.217528,28,-1.661325,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.040999,-0.13639,-0.586602,-0.136252,-0.469148,0.451604,4.971346,-0.160128,-0.132378,-0.057831
1,2015-11-11 01:00:00,-1.217528,25,-1.516862,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,0.252335,-0.430276,-0.660022,-0.136252,-0.493512,0.407193,4.971346,-0.160128,-0.132378,-0.057831
2,2015-11-11 02:00:00,-1.217528,19,-1.372399,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,0.105668,-0.430276,-0.733442,-0.136252,-0.469148,0.407193,4.971346,-0.160128,-0.132378,-0.057831
3,2015-11-11 03:00:00,-1.217528,21,-1.227936,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.187666,-0.283333,-0.794625,-0.136252,-0.444783,0.407193,4.971346,-0.160128,-0.132378,-0.057831
4,2015-11-11 04:00:00,-1.217528,20,-1.083473,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.089888,-0.577219,-0.843572,-0.136252,-0.274233,0.451604,4.971346,-0.160128,-0.132378,-0.057831


In [56]:
finale_df = final_df

In [57]:
finale_df.to_csv('Integrated_Dataset_Gade_Pavan_Kumar_Reddy.csv',index = False)

In [58]:
finale_df.head()

Unnamed: 0,DateTime,Junction,Vehicles,Hour,Day,Weekday,Month,Year,IsWeekend,Lag_1hr,Lag_24hr,Temperature_C,Precipitation_mm,WindSpeed_kmh,Humidity_percent,is_public_holiday,is_sports_event,is_concert,is_public_demonstration
0,2015-11-11 00:00:00,-1.217528,28,-1.661325,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.040999,-0.13639,-0.586602,-0.136252,-0.469148,0.451604,4.971346,-0.160128,-0.132378,-0.057831
1,2015-11-11 01:00:00,-1.217528,25,-1.516862,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,0.252335,-0.430276,-0.660022,-0.136252,-0.493512,0.407193,4.971346,-0.160128,-0.132378,-0.057831
2,2015-11-11 02:00:00,-1.217528,19,-1.372399,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,0.105668,-0.430276,-0.733442,-0.136252,-0.469148,0.407193,4.971346,-0.160128,-0.132378,-0.057831
3,2015-11-11 03:00:00,-1.217528,21,-1.227936,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.187666,-0.283333,-0.794625,-0.136252,-0.444783,0.407193,4.971346,-0.160128,-0.132378,-0.057831
4,2015-11-11 04:00:00,-1.217528,20,-1.083473,-0.627605,-0.495629,1.448992,-2.195716,-0.628768,-0.089888,-0.577219,-0.843572,-0.136252,-0.274233,0.451604,4.971346,-0.160128,-0.132378,-0.057831
