In [37]:
# Import the modules

import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

import warnings
warnings.filterwarnings('ignore')


In [38]:
# Read the CSV file from the Resources folder into a Pandas DataFrame

data = pd.read_csv("data/neural_network_df.csv")


# Review the DataFrame

data.head()


Unnamed: 0,state,temperature,precip,humidity,visibility,pressure,cloudcover,heatindex,dewpoint,windchill,windgust,feelslike,mintemp,maxtemp,avgtemp,totalsnow,sunhour,blmn
0,Florida,18.5311,0.124402,60.258373,9.966507,1018.114833,28.937799,17.062201,8.641148,16.239234,17.559809,16.37799,14.363636,18.5311,16.593301,0.0,7.513397,18.34
1,Arizona,17.733333,0.0,31.533333,10.0,1019.066667,8.266667,13.133333,-3.933333,12.666667,7.133333,12.666667,8.733333,17.733333,13.066667,0.0,8.7,18.34
2,South Carolina,12.875,0.0,67.291667,10.0,1017.041667,21.708333,9.666667,3.75,8.583333,12.916667,8.583333,7.083333,12.875,10.166667,0.0,8.2875,18.34
3,Georgia,12.032258,0.0,65.935484,10.0,1019.129032,9.225806,7.354839,1.096774,5.322581,15.322581,5.322581,3.741935,12.032258,7.870968,0.0,8.7,18.34
4,Nevada,11.625,2.2375,46.25,9.5,1018.0,29.375,7.25,-5.0,6.25,11.375,6.25,2.375,11.625,8.0,1.225,7.4,18.34


In [39]:
# Transform the Card Type column using get_dummies()

dummies = pd.get_dummies(data["state"])


# Display sample data

dummies.head()


Unnamed: 0,Arizona,Florida,Georgia,Nevada,South Carolina
0,0,1,0,0,0
1,1,0,0,0,0
2,0,0,0,0,1
3,0,0,1,0,0
4,0,0,0,1,0


In [40]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames

data = pd.concat([data, dummies], axis=1)


# drop the original catrgoricl column

data = data.drop(columns="state")


# Display sample data

data.head()


Unnamed: 0,temperature,precip,humidity,visibility,pressure,cloudcover,heatindex,dewpoint,windchill,windgust,...,maxtemp,avgtemp,totalsnow,sunhour,blmn,Arizona,Florida,Georgia,Nevada,South Carolina
0,18.5311,0.124402,60.258373,9.966507,1018.114833,28.937799,17.062201,8.641148,16.239234,17.559809,...,18.5311,16.593301,0.0,7.513397,18.34,0,1,0,0,0
1,17.733333,0.0,31.533333,10.0,1019.066667,8.266667,13.133333,-3.933333,12.666667,7.133333,...,17.733333,13.066667,0.0,8.7,18.34,1,0,0,0,0
2,12.875,0.0,67.291667,10.0,1017.041667,21.708333,9.666667,3.75,8.583333,12.916667,...,12.875,10.166667,0.0,8.2875,18.34,0,0,0,0,1
3,12.032258,0.0,65.935484,10.0,1019.129032,9.225806,7.354839,1.096774,5.322581,15.322581,...,12.032258,7.870968,0.0,8.7,18.34,0,0,1,0,0
4,11.625,2.2375,46.25,9.5,1018.0,29.375,7.25,-5.0,6.25,11.375,...,11.625,8.0,1.225,7.4,18.34,0,0,0,1,0


In [46]:
# Separate the data into labels and features

# Separate the y variable, the labels

y = data["blmn"]


y.head()


0    18.34
1    18.34
2    18.34
3    18.34
4    18.34
Name: blmn, dtype: float64

In [42]:
# Separate the X variable, the features

X = data.drop(columns="blmn")


X.head()


Unnamed: 0,temperature,precip,humidity,visibility,pressure,cloudcover,heatindex,dewpoint,windchill,windgust,...,mintemp,maxtemp,avgtemp,totalsnow,sunhour,Arizona,Florida,Georgia,Nevada,South Carolina
0,18.5311,0.124402,60.258373,9.966507,1018.114833,28.937799,17.062201,8.641148,16.239234,17.559809,...,14.363636,18.5311,16.593301,0.0,7.513397,0,1,0,0,0
1,17.733333,0.0,31.533333,10.0,1019.066667,8.266667,13.133333,-3.933333,12.666667,7.133333,...,8.733333,17.733333,13.066667,0.0,8.7,1,0,0,0,0
2,12.875,0.0,67.291667,10.0,1017.041667,21.708333,9.666667,3.75,8.583333,12.916667,...,7.083333,12.875,10.166667,0.0,8.2875,0,0,0,0,1
3,12.032258,0.0,65.935484,10.0,1019.129032,9.225806,7.354839,1.096774,5.322581,15.322581,...,3.741935,12.032258,7.870968,0.0,8.7,0,0,1,0,0
4,11.625,2.2375,46.25,9.5,1018.0,29.375,7.25,-5.0,6.25,11.375,...,2.375,11.625,8.0,1.225,7.4,0,0,0,1,0


In [47]:
# Check the balance of our target values

y.value_counts().sort_values()

19.99     3
21.33     5
27.00     5
20.96     5
19.17     5
         ..
21.07     5
23.41    10
26.10    10
24.84    10
19.35    10
Name: blmn, Length: 76, dtype: int64

In [44]:
# Import the train_test_learn module

from sklearn.model_selection import train_test_split


# Split the data using train_test_split
# Assign a random_state of 1 to the function

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [45]:
# Import the LogisticRegression module from SKLearn

from sklearn.linear_model import LogisticRegression


# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model

logistic_regression_model = LogisticRegression(random_state=1)


# Fit the model using training data

lr_model = logistic_regression_model.fit(X_train, y_train)


ValueError: Unknown label type: 'continuous'