<a href="https://colab.research.google.com/github/naolkuma/online_shoes_shopping/blob/main/onlineone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import tensorflow
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from keras import callbacks
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, LSTM, Dropout, GRU, Bidirectional
from tensorflow.keras.optimizers import SGD
import math
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings("ignore")


In [None]:
## loading data

df = pd.read_csv("traffic.csv")
df.head()

In [None]:
df.info()

In [6]:
## process time data

df['DateTime'] = pd.to_datetime(df['DateTime'])

In [7]:
## drop ID

df = df.drop('ID', axis=1)

In [None]:

df_ = df.copy()
df_.head()

In [None]:
## lets speak with data


palette = [ "#FFD4DB","#BBE7FE","#D3B5E5","#dfe2b6"]
plt.figure(figsize=(20, 20), dpi=150)
series = sns.lineplot(x=df_['DateTime'], y = "Vehicles", data=df_, hue="Junction", palette=palette)
series.set_title("Traffic based Junction", fontsize=18)
series.set_ylabel("Number of Veichles")
series.set_xlabel('Date')

In [None]:
## waite Waite, let data talk more

df_['Year'] = df_['DateTime'].dt.year
df_['Month'] = df_['DateTime'].dt.month
df_['Date_no'] = df_['DateTime'].dt.day
df_['Hour'] = df_['DateTime'].dt.hour
df_['Day'] = df_.DateTime.dt.strftime("%A")
df_.head()

In [None]:
## plot TIme series data

created_feature = ['Year', 'Month', 'Date_no', 'Hour', 'Day']

for i in created_feature:
    plt.figure(figsize=(10, 2), dpi=100)
    ax = sns.lineplot(x=df_[i], y='Vehicles', data=df, hue='Junction', palette = palette)
    plt.legend(bbox_to_anchor = (1.05, 1), loc=2, borderaxespad=0.)

In [None]:
## plt

plt.figure(figsize=(12, 5), dpi=100)
count = sns.countplot(data=df_, x = df_['Year'], hue = 'Junction', palette = palette)
count.set_title('Count Of traffic On junction by years')
count.set_ylabel("Number of Vehicles")
count.set_xlabel('Year')

In [None]:
## Correleation metrx

corrmat = df_.corr()
plt.subplots(figsize=(10,10))
sns.heatmap(corrmat,cmap= "Pastel2",annot=True,square=True)

In [None]:
## pair plot

sns.pairplot(data=df_, hue= "Junction",palette=palette)

In [None]:
## create pivot table

df_junction  = df.pivot(columns='Junction', index="DateTime")
df_junction.describe()

In [None]:
#Creating new sets

df_1 = df_junction[[('Vehicles', 1)]]
df_2 = df_junction[[('Vehicles', 2)]]
df_3 = df_junction[[('Vehicles', 3)]]
df_4 = df_junction[[('Vehicles', 4)]]
df_4 = df_4.dropna()

list_dfs = [df_1, df_2, df_3, df_4]
for i in list_dfs:
    i.columns= i.columns.droplevel(level=1)

    #Function to plot comparitive plots of dataframes
def Sub_Plots4(df_1, df_2,df_3,df_4,title):
    fig, axes = plt.subplots(4, 1, figsize=(15, 8),sharey=True)
    fig.suptitle(title)
    #J1
    pl_1=sns.lineplot(ax=axes[0],data=df_1,color=palette[0])
    #pl_1=plt.ylabel()
    axes[0].set(ylabel ="Junction 1")
    #J2
    pl_2=sns.lineplot(ax=axes[1],data=df_2,color=palette[1])
    axes[1].set(ylabel ="Junction 2")
    #J3
    pl_3=sns.lineplot(ax=axes[2],data=df_3,color=palette[2])
    axes[2].set(ylabel ="Junction 3")
    #J4
    pl_4=sns.lineplot(ax=axes[3],data=df_4,color=palette[3])
    axes[3].set(ylabel ="Junction 4")


#Plotting the dataframe to check for stationarity
Sub_Plots4(df_1.Vehicles, df_2.Vehicles,df_3.Vehicles,df_4.Vehicles,"Dataframes Before Transformation")

In [18]:
# Normalization Function

def Normalize(df, col):
    average = df[col].mean()
    stdev = df[col].std()
    df_normalized = (df[col] - average)/stdev
    df_normalized = df_normalized.to_frame()
    return df_normalized, average, stdev

In [19]:
## Differencing Function

def Difference(df, col, interval):
    diff = []
    for i in range(interval, len(df)):
        value = df[col][i] = df[col][i - interval]
        diff.append(value)

    return diff

In [20]:
#Normalizing and Differencing to make the series stationary
df_N1, av_J1, std_J1 = Normalize(df_1, "Vehicles")
Diff_1 = Difference(df_N1, col="Vehicles", interval=(24*7)) #taking a week's diffrence
df_N1 = df_N1[24*7:]
df_N1.columns = ["Norm"]
df_N1["Diff"]= Diff_1

df_N2, av_J2, std_J2 = Normalize(df_2, "Vehicles")
Diff_2 = Difference(df_N2, col="Vehicles", interval=(24)) #taking a day's diffrence
df_N2 = df_N2[24:]
df_N2.columns = ["Norm"]
df_N2["Diff"]= Diff_2

df_N3, av_J3, std_J3 = Normalize(df_3, "Vehicles")
Diff_3 = Difference(df_N3, col="Vehicles", interval=1) #taking an hour's diffrence
df_N3 = df_N3[1:]
df_N3.columns = ["Norm"]
df_N3["Diff"]= Diff_3

df_N4, av_J4, std_J4 = Normalize(df_4, "Vehicles")
Diff_4 = Difference(df_N4, col="Vehicles", interval=1) #taking an hour's diffrence
df_N4 = df_N4[1:]
df_N4.columns = ["Norm"]
df_N4["Diff"]= Diff_4

In [None]:
Sub_Plots4(df_N1.Diff, df_N2.Diff,df_N3.Diff,df_N4.Diff,"Dataframes After Transformation")

In [24]:
# Assuming you have preprocessed data (X, y)
# X is the feature matrix, and y is the corresponding target variable

from sklearn.model_selection import train_test_split
features = ['DateTime', 'Junction', 'ID']
target_variable = 'Vehicles'

# Create feature matrix (X) and target variable (y)
X = data[features]
y = data[target_variable]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Explore the split data
print("Training Set (70%):")
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

print("\nTesting Set (30%):")
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


Training Set (70%):
X_train shape: (33684, 3)
y_train shape: (33684,)

Testing Set (30%):
X_test shape: (14436, 3)
y_test shape: (14436,)
