In [43]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

In [44]:
#  Import and read the charity_data.csv.
Traffic_analysis_df = pd.read_csv("Metro_Interstate_Traffic_Volume.csv")
Traffic_analysis_df.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,scattered clouds,10/2/2012 9:00,5545
1,,289.36,0.0,0.0,75,Clouds,broken clouds,10/2/2012 10:00,4516
2,,289.58,0.0,0.0,90,Clouds,overcast clouds,10/2/2012 11:00,4767
3,,290.13,0.0,0.0,90,Clouds,overcast clouds,10/2/2012 12:00,5026
4,,291.14,0.0,0.0,75,Clouds,broken clouds,10/2/2012 13:00,4918


In [45]:
# Drop the non-beneficial ID columns, 'weather_main' and 'weather_description'.
Traffic_analysis_df = Traffic_analysis_df.drop(['weather_main','weather_description'], axis=1)
Traffic_analysis_df.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,date_time,traffic_volume
0,,288.28,0.0,0.0,40,10/2/2012 9:00,5545
1,,289.36,0.0,0.0,75,10/2/2012 10:00,4516
2,,289.58,0.0,0.0,90,10/2/2012 11:00,4767
3,,290.13,0.0,0.0,90,10/2/2012 12:00,5026
4,,291.14,0.0,0.0,75,10/2/2012 13:00,4918


In [46]:
# Determine the number of unique values in each column.
Traffic_analysis_df.nunique()

holiday              12
temp               5843
rain_1h             372
snow_1h              12
clouds_all           60
date_time         40575
traffic_volume     6704
dtype: int64

In [47]:
Traffic_analysis_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   holiday         48204 non-null  object 
 1   temp            48204 non-null  float64
 2   rain_1h         48204 non-null  float64
 3   snow_1h         48204 non-null  float64
 4   clouds_all      48204 non-null  int64  
 5   date_time       48204 non-null  object 
 6   traffic_volume  48204 non-null  int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 2.6+ MB


In [48]:
import datetime as dt
import calendar 

# convert the 'date_time' column to datetime format
Traffic_analysis_df['date_time']= pd.to_datetime(Traffic_analysis_df['date_time'])
 
# Check the format of 'date_time' column
Traffic_analysis_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   holiday         48204 non-null  object        
 1   temp            48204 non-null  float64       
 2   rain_1h         48204 non-null  float64       
 3   snow_1h         48204 non-null  float64       
 4   clouds_all      48204 non-null  int64         
 5   date_time       48204 non-null  datetime64[ns]
 6   traffic_volume  48204 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(2), object(1)
memory usage: 2.6+ MB


In [50]:
Traffic_analysis_df.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,date_time,traffic_volume
0,,288.28,0.0,0.0,40,2012-10-02 09:00:00,5545
1,,289.36,0.0,0.0,75,2012-10-02 10:00:00,4516
2,,289.58,0.0,0.0,90,2012-10-02 11:00:00,4767
3,,290.13,0.0,0.0,90,2012-10-02 12:00:00,5026
4,,291.14,0.0,0.0,75,2012-10-02 13:00:00,4918


In [51]:
# convert just columns "temp","rain_1h","snow_1h"
Traffic_analysis_df[["temp","rain_1h","snow_1h"]] = Traffic_analysis_df[["temp","rain_1h","snow_1h"]].apply(pd.to_numeric)
Traffic_analysis_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   holiday         48204 non-null  object        
 1   temp            48204 non-null  float64       
 2   rain_1h         48204 non-null  float64       
 3   snow_1h         48204 non-null  float64       
 4   clouds_all      48204 non-null  int64         
 5   date_time       48204 non-null  datetime64[ns]
 6   traffic_volume  48204 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(2), object(1)
memory usage: 2.6+ MB


In [52]:
date = pd.date_range('2012-10-02', '2018-9-30',
                     freq='D').to_series()
date.dt.dayofweek

2012-10-02    1
2012-10-03    2
2012-10-04    3
2012-10-05    4
2012-10-06    5
             ..
2018-09-26    2
2018-09-27    3
2018-09-28    4
2018-09-29    5
2018-09-30    6
Freq: D, Length: 2190, dtype: int64

In [53]:
Traffic_analysis_df['Day of the Week']= date

Traffic_analysis_df.head()


Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,date_time,traffic_volume,Day of the Week
0,,288.28,0.0,0.0,40,2012-10-02 09:00:00,5545,NaT
1,,289.36,0.0,0.0,75,2012-10-02 10:00:00,4516,NaT
2,,289.58,0.0,0.0,90,2012-10-02 11:00:00,4767,NaT
3,,290.13,0.0,0.0,90,2012-10-02 12:00:00,5026,NaT
4,,291.14,0.0,0.0,75,2012-10-02 13:00:00,4918,NaT


In [54]:
# Generate our categorical variable lists
application_cat = Traffic_analysis_df.dtypes[Traffic_analysis_df.dtypes=='object'].index.tolist()
application_cat

['holiday']

In [55]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(Traffic_analysis_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(application_cat)
encode_df.head()

Unnamed: 0,holiday_Christmas Day,holiday_Columbus Day,holiday_Independence Day,holiday_Labor Day,holiday_Martin Luther King Jr Day,holiday_Memorial Day,holiday_New Years Day,holiday_None,holiday_State Fair,holiday_Thanksgiving Day,holiday_Veterans Day,holiday_Washingtons Birthday
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [56]:
# Merge one-hot encoded features and drop the originals
Traffic_analysis_df = Traffic_analysis_df.merge(encode_df,left_index=True, right_index=True)
Traffic_analysis_df = Traffic_analysis_df.drop(application_cat,1)
Traffic_analysis_df.head()

Unnamed: 0,temp,rain_1h,snow_1h,clouds_all,date_time,traffic_volume,Day of the Week,holiday_Christmas Day,holiday_Columbus Day,holiday_Independence Day,holiday_Labor Day,holiday_Martin Luther King Jr Day,holiday_Memorial Day,holiday_New Years Day,holiday_None,holiday_State Fair,holiday_Thanksgiving Day,holiday_Veterans Day,holiday_Washingtons Birthday
0,288.28,0.0,0.0,40,2012-10-02 09:00:00,5545,NaT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,289.36,0.0,0.0,75,2012-10-02 10:00:00,4516,NaT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,289.58,0.0,0.0,90,2012-10-02 11:00:00,4767,NaT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,290.13,0.0,0.0,90,2012-10-02 12:00:00,5026,NaT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,291.14,0.0,0.0,75,2012-10-02 13:00:00,4918,NaT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [57]:
# Split our preprocessed data into our features and target arrays
y = Traffic_analysis_df["traffic_volume"].values
X = Traffic_analysis_df.drop(["traffic_volume"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=50)

In [59]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

NameError: name 'date_time' is not defined

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  ?
# hidden_nodes_layer2 = ?

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
# nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()