# Traffic Pridiction

In [17]:
# Import Libraries
import numpy as np # For Math
import pandas as pd # For Data Visualization
import matplotlib.pyplot as plt # For Graph
import seaborn as sns # For Advence Graph

from sklearn.model_selection import train_test_split # Import function to split dataset into training and testing sets
from sklearn.ensemble import RandomForestRegressor # Import the Random Forest Regressor model
from sklearn.metrics import mean_absolute_error, mean_squared_error # Regression evaluation metrics


import warnings # Handeling Error
warnings.filterwarnings('ignore')

In [18]:
df = pd.read_csv('Traffic-Prediction-using-Machine-Learning.csv')
df.head()

Unnamed: 0,Day,Date,CodedDay,Zone,Weather,Temperature,Traffic
0,Wednesday,01-06-18,3,2,35,17,2
1,Wednesday,01-06-18,3,3,36,16,3
2,Wednesday,01-06-18,3,4,27,25,5
3,Wednesday,01-06-18,3,5,23,23,3
4,Wednesday,01-06-18,3,6,18,42,2


In [19]:
df.describe()

Unnamed: 0,CodedDay,Zone,Weather,Temperature,Traffic
count,1439.0,1439.0,1439.0,1439.0,1439.0
mean,4.000695,72.549687,23.886032,25.011119,3.031967
std,1.733054,41.55433,13.576787,11.325376,1.429018
min,1.0,1.0,0.0,6.0,1.0
25%,3.0,37.0,12.0,15.0,2.0
50%,4.0,73.0,24.0,25.0,3.0
75%,5.0,108.5,35.0,35.0,4.0
max,7.0,144.0,47.0,45.0,5.0


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1439 entries, 0 to 1438
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Day          1439 non-null   object
 1   Date         1439 non-null   object
 2   CodedDay     1439 non-null   int64 
 3   Zone         1439 non-null   int64 
 4   Weather      1439 non-null   int64 
 5   Temperature  1439 non-null   int64 
 6   Traffic      1439 non-null   int64 
dtypes: int64(5), object(2)
memory usage: 78.8+ KB


# Converting the data and time

In [21]:
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%y')

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1439 entries, 0 to 1438
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Day          1439 non-null   object        
 1   Date         1439 non-null   datetime64[ns]
 2   CodedDay     1439 non-null   int64         
 3   Zone         1439 non-null   int64         
 4   Weather      1439 non-null   int64         
 5   Temperature  1439 non-null   int64         
 6   Traffic      1439 non-null   int64         
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 78.8+ KB


# Missing value handel if the dataframe has null value

In [7]:
df.isnull().sum()

Day            0
Date           0
CodedDay       0
Zone           0
Weather        0
Temperature    0
Traffic        0
dtype: int64

# Feature Selection

In [8]:
x = df[['CodedDay', 'Zone', 'Weather', 'Temperature']]
y = df[['Traffic']]

In [9]:
x.head()

Unnamed: 0,CodedDay,Zone,Weather,Temperature
0,3,2,35,17
1,3,3,36,16
2,3,4,27,25
3,3,5,23,23
4,3,6,18,42


In [10]:
y.head()

Unnamed: 0,Traffic
0,2
1,3
2,5
3,3
4,2


# Choosing Algorithm

# I choose Random Forest Regression because it offers a great balance between performance, robustness, and simplicity. Here are the main reasons.

In [11]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

In [12]:
# Selection Model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# Predict The Test Data

In [13]:
y_pred = rf.predict(x_test)
y_pred

array([2.79, 3.48, 2.83, 2.47, 3.4 , 3.59, 3.19, 3.14, 3.53, 2.52, 2.87,
       3.18, 2.97, 3.04, 2.65, 4.02, 2.57, 3.06, 2.75, 2.59, 2.32, 2.42,
       3.31, 3.3 , 2.96, 2.89, 3.35, 2.75, 2.97, 2.71, 3.33, 2.91, 2.93,
       2.89, 3.11, 3.71, 3.14, 2.42, 3.29, 3.75, 3.26, 4.02, 2.32, 2.91,
       3.01, 2.57, 2.98, 3.11, 3.61, 2.24, 3.81, 3.26, 3.25, 3.48, 2.89,
       3.4 , 2.71, 2.87, 2.68, 2.87, 3.07, 2.64, 3.13, 2.55, 2.72, 3.56,
       3.11, 3.32, 2.3 , 2.82, 3.76, 1.24, 2.68, 3.69, 3.14, 3.29, 3.27,
       2.35, 2.75, 2.62, 2.74, 3.75, 2.65, 2.94, 3.29, 3.15, 2.77, 3.08,
       2.9 , 2.94, 2.6 , 2.38, 3.34, 3.  , 1.92, 2.3 , 2.66, 2.98, 3.43,
       2.74, 2.71, 2.71, 3.11, 2.82, 3.07, 2.82, 2.75, 2.67, 2.77, 2.84,
       2.88, 2.84, 3.14, 2.75, 2.51, 1.64, 2.95, 2.41, 3.79, 3.1 , 2.26,
       2.73, 3.55, 3.67, 2.89, 2.13, 3.66, 3.04, 3.49, 2.8 , 3.12, 2.91,
       3.08, 2.95, 3.48, 3.33, 2.57, 3.14, 2.42, 3.37, 2.06, 2.67, 3.17,
       2.31, 3.65, 2.2 , 2.17, 2.54, 3.67, 3.06, 3.

In [14]:
# Score Checking
rf.score(x_test, y_test)

-0.04004800244910456

In [15]:
# Converting Array
y_test_array = np.array(y_test)

# Evaluate model performance
mae = mean_absolute_error(y_test_array, y_pred)
mse = mean_squared_error(y_test_array, y_pred)
error_rate = (np.abs(y_pred - y_test_array) / y_test_array).mean() * 100

In [16]:
# Print The Result
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'Error Rate (%): {error_rate}')

Mean Absolute Error: 1.2941319444444446
Mean Squared Error: 2.2117947916666667
Error Rate (%): 65.24579836998457
