Imports

In [25]:
# Imports

import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder


Prepare the data

In [26]:
# Read the weather and pricing csvs and join dataframes

df = pd.read_csv("data/neural_network_df.csv")


# Review

df.head(15)


Unnamed: 0,state,temperature,precip,humidity,visibility,pressure,cloudcover,heatindex,dewpoint,windchill,windgust,feelslike,mintemp,maxtemp,avgtemp,totalsnow,sunhour,blmn
0,Florida,18.5311,0.124402,60.258373,9.966507,1018.114833,28.937799,17.062201,8.641148,16.239234,17.559809,16.37799,14.363636,18.5311,16.593301,0.0,7.513397,18.34
1,Arizona,17.733333,0.0,31.533333,10.0,1019.066667,8.266667,13.133333,-3.933333,12.666667,7.133333,12.666667,8.733333,17.733333,13.066667,0.0,8.7,18.34
2,South Carolina,12.875,0.0,67.291667,10.0,1017.041667,21.708333,9.666667,3.75,8.583333,12.916667,8.583333,7.083333,12.875,10.166667,0.0,8.2875,18.34
3,Georgia,12.032258,0.0,65.935484,10.0,1019.129032,9.225806,7.354839,1.096774,5.322581,15.322581,5.322581,3.741935,12.032258,7.870968,0.0,8.7,18.34
4,Nevada,11.625,2.2375,46.25,9.5,1018.0,29.375,7.25,-5.0,6.25,11.375,6.25,2.375,11.625,8.0,1.225,7.4,18.34
5,Florida,18.732057,0.0,55.483254,10.0,1018.382775,0.669856,15.473684,6.282297,15.07177,18.449761,15.07177,12.444976,18.732057,15.866029,0.0,8.7,19.05
6,South Carolina,13.333333,0.008333,66.541667,9.958333,1015.791667,23.125,9.333333,3.0,7.75,17.208333,7.75,6.291667,13.333333,9.833333,0.0,8.3,19.05
7,Nevada,12.75,0.5,47.625,10.0,1021.125,9.875,7.625,-4.625,6.5,12.5,6.5,2.125,12.75,8.125,0.0,8.7,19.05
8,Georgia,11.806452,0.070968,68.709677,9.645161,1018.032258,28.516129,8.677419,2.645161,6.774194,16.741935,6.774194,6.193548,11.806452,9.064516,0.0,7.732258,19.05
9,Arizona,20.133333,0.0,31.933333,10.0,1017.933333,12.4,15.066667,-2.2,14.866667,7.2,14.866667,10.933333,20.133333,15.8,0.0,8.7,19.05


In [27]:
# Drop the irrelevant columns from the df 

df = df.drop(columns=([
    #"precip",
    "humidity", 
    "visibility",
    "pressure",
    "cloudcover",
    "heatindex",
    "dewpoint",
    "windchill",
    "windgust",
    "feelslike",
    "mintemp",
    "maxtemp",
    "avgtemp",
    "totalsnow",
    "sunhour" 
   ]))


    

# review

df.tail(15)


Unnamed: 0,state,temperature,precip,blmn
383,Arizona,11.2,1.553333,21.46
384,South Carolina,22.166667,0.129167,21.46
385,Georgia,21.032258,0.654839,21.46
386,Florida,27.75,0.087903,21.46
387,Nevada,3.875,0.825,21.46
388,Nevada,4.625,2.475,21.2
389,Florida,28.741935,0.123387,21.2
390,Arizona,13.666667,0.753333,21.2
391,South Carolina,22.958333,0.504167,21.2
392,Georgia,22.419355,1.870968,21.2


One Hot Encoder

In [28]:
# Create a list of categorical variables 

categorical_variables = list(df.dtypes[df.dtypes == "object"].index)


# Review

categorical_variables

['state']

In [29]:
# Create a OneHotEncoder instance

enc = OneHotEncoder(sparse=False)


In [30]:
# Encode the categorcal variables using OneHotEncoder

encoded_data = enc.fit_transform(df[categorical_variables])


In [31]:
# Create a DataFrame with the encoded variables

encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
)


# Review the DataFrame

encoded_df.head(15)


Unnamed: 0,state_Arizona,state_Florida,state_Georgia,state_Nevada,state_South Carolina
0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0
5,0.0,1.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,1.0
7,0.0,0.0,0.0,1.0,0.0
8,0.0,0.0,1.0,0.0,0.0
9,1.0,0.0,0.0,0.0,0.0


In [32]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame

concat_encoded_df = pd.concat(
    [
        df.drop(columns= categorical_variables),
        encoded_df
    ],
    axis=1
)



# Review the Dataframe

concat_encoded_df.head()


Unnamed: 0,temperature,precip,blmn,state_Arizona,state_Florida,state_Georgia,state_Nevada,state_South Carolina
0,18.5311,0.124402,18.34,0.0,1.0,0.0,0.0,0.0
1,17.733333,0.0,18.34,1.0,0.0,0.0,0.0,0.0
2,12.875,0.0,18.34,0.0,0.0,0.0,0.0,1.0
3,12.032258,0.0,18.34,0.0,0.0,1.0,0.0,0.0
4,11.625,2.2375,18.34,0.0,0.0,0.0,1.0,0.0


Features (x) & Target (y) datasets

In [33]:
# Define the target set y

y = concat_encoded_df["blmn"]


# Display a sample of y

y[:5]


0    18.34
1    18.34
2    18.34
3    18.34
4    18.34
Name: blmn, dtype: float64

In [34]:
# Define features set X by selecting all columns but y

X = concat_encoded_df.drop(columns="blmn")


# Review

X.head()


Unnamed: 0,temperature,precip,state_Arizona,state_Florida,state_Georgia,state_Nevada,state_South Carolina
0,18.5311,0.124402,0.0,1.0,0.0,0.0,0.0
1,17.733333,0.0,1.0,0.0,0.0,0.0,0.0
2,12.875,0.0,0.0,0.0,0.0,0.0,1.0
3,12.032258,0.0,0.0,0.0,1.0,0.0,0.0
4,11.625,2.2375,0.0,0.0,0.0,1.0,0.0


Split Train/Test datasets

In [35]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


Standard Scaler

In [36]:
# Create a StandardScaler instance

scaler = StandardScaler()


# Fit the scaler to the features training dataset

X_scaler = scaler.fit(X_train)


# Fit the scaler to the features training dataset

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


Neural Network

In [37]:
# Define the the number of inputs (features) to the model

number_input_features = len(X_train.iloc[0])


# Review

number_input_features


7

In [38]:
# Define the number of neurons in the output layer

number_output_neurons = 1


In [39]:
# Define the number of hidden nodes for the first hidden layer

hidden_nodes_layer1 =  (number_input_features + 1) // 2 


# Review the number hidden nodes in the first layer

hidden_nodes_layer1


4

In [40]:
# Define the number of hidden nodes for the second hidden layer

hidden_nodes_layer2 =  (hidden_nodes_layer1 + 1) // 2


# Review the number hidden nodes in the second layer

hidden_nodes_layer2


2

In [41]:
# Create the Sequential model instance

nn = Sequential()


In [42]:
# Add the first hidden layer

nn.add(Dense(
    units=hidden_nodes_layer1, 
    input_dim=number_input_features, 
    activation="relu")
)


In [43]:
# Add the second hidden layer

nn.add(Dense(
    units=hidden_nodes_layer2, 
    activation="relu")
)


In [44]:
# Add the output layer to the model specifying the number of output neurons and activation function

nn.add(Dense(
    units=number_output_neurons, 
    activation="sigmoid")
)


In [45]:
# Display the Sequential model summary

nn.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 4)                 32        
                                                                 
 dense_4 (Dense)             (None, 2)                 10        
                                                                 
 dense_5 (Dense)             (None, 1)                 3         
                                                                 
Total params: 45
Trainable params: 45
Non-trainable params: 0
_________________________________________________________________


Compile & Fit model

In [46]:
# Compile the Sequential model

nn.compile(
    loss="binary_crossentropy", 
    optimizer="adam", 
    metrics=["accuracy"]
)


In [47]:
# Fit the model using 50 epochs and the training data

fit_model = nn.fit(X_train_scaled, y_train, epochs=50)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Evaluate

In [48]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data

model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)


# Display the model loss and accuracy results

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


4/4 - 1s - loss: -1.5446e+02 - accuracy: 0.0000e+00 - 712ms/epoch - 178ms/step
Loss: -154.45962524414062, Accuracy: 0.0
