# Neural Networks with Grid Searching


## Imports

In [1]:
%pip install scikeras

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.dummy import DummyClassifier

from scikeras.wrappers import KerasClassifier, KerasRegressor

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

Collecting scikeras
  Downloading scikeras-0.6.1-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.6.1
Note: you may need to restart the kernel to use updated packages.


## Import Data

In [2]:
classification_data = pd.read_csv('./demographics-data/classification_data_demographics.csv')
classification_data.head(2)

Unnamed: 0,MonthOccurrence,TotalNumberOfVictims,TotalNumberOfIndividualVictims,SuspectsRaceAsAGroup,TotalNumberOfSuspects,MostSeriousUcr,MostSeriousUcrType,MostSeriousLocation,MostSeriousBias,MostSeriousBiasType,...,pct_Asian,pct_AAPI,pct_Black,pct_Hispanic,pct_Multi-Racial/Ethnic,pct_Hawaiian/ Pacific Island,pct_White,pct_unemployed_2018,pct_u18_poverty_2017,median_hh_income_2017
0,2,1,1,White,2,Simple Assault,Violent Crimes,Bar/Night Club,Anti-Black or African American,Race/Ethnicity/Ancestry,...,0.265963,0.27413,0.115603,0.234718,0.041847,0.008167,0.337067,0.03,0.105,95550.0
1,2,2,2,White,1,Destruction/Damage/Vandalism,Property Crimes,Residence/Home/Driveway,Anti-Black or African American,Race/Ethnicity/Ancestry,...,0.265963,0.27413,0.115603,0.234718,0.041847,0.008167,0.337067,0.03,0.105,95550.0


## Transform and Scale Data

### Column Transformer

In [3]:
X = classification_data.drop(columns=['labels'])
ct = make_column_transformer(
    (OneHotEncoder(sparse=False, handle_unknown='ignore'), make_column_selector(dtype_include=object)),
    remainder='passthrough',
    verbose_feature_names_out=False
)
X_encoded = ct.fit_transform(X)
X_encoded
ct.get_feature_names_out()
X_encoded = pd.DataFrame(X_encoded, columns=ct.get_feature_names_out())

In [4]:
X_encoded.head(2)

Unnamed: 0,SuspectsRaceAsAGroup_American Indian or Alaska Native,SuspectsRaceAsAGroup_Asian,SuspectsRaceAsAGroup_Asian/Pacific Islander,SuspectsRaceAsAGroup_Black or African American,SuspectsRaceAsAGroup_East Indian,SuspectsRaceAsAGroup_East Indian/Asian Indian,SuspectsRaceAsAGroup_Group of Multiple Races,SuspectsRaceAsAGroup_Hispanic,SuspectsRaceAsAGroup_Native Hawaiian or Other Pacific Islander,SuspectsRaceAsAGroup_Unknown,...,pct_Asian,pct_AAPI,pct_Black,pct_Hispanic,pct_Multi-Racial/Ethnic,pct_Hawaiian/ Pacific Island,pct_White,pct_unemployed_2018,pct_u18_poverty_2017,median_hh_income_2017
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.265963,0.27413,0.115603,0.234718,0.041847,0.008167,0.337067,0.03,0.105,95550.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.265963,0.27413,0.115603,0.234718,0.041847,0.008167,0.337067,0.03,0.105,95550.0


### Scaling

In [5]:
X_encoded_scaled = StandardScaler().fit_transform(X_encoded)

## Target

In [6]:
y = classification_data['labels']

In [7]:
y.unique()

array([1, 0, 2])

In [8]:
y_categorical = to_categorical(y, 3)

## Baseline

In [9]:
y.value_counts(normalize=True)

1    0.728433
0    0.168726
2    0.102840
Name: labels, dtype: float64

## Test/Train Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded_scaled, y_categorical, stratify=y, random_state=13)

In [11]:
X_train.shape

(18327, 161)

In [12]:
y_train.shape

(18327, 3)

## Neural Network Classification with Grid Searching

**Only run this in Colab**

With help from Lab 7.01

In [13]:
# Got input shape error in grid search. Using this from lesson with Chuck given: "Model <keras.engine.sequential.Sequential object at 0x7fdf22f98410> cannot be saved because the input shapes have not been set. Usually, input shapes are automatically determined when calling `.fit()` or `.predict()`. To manually set the shapes, call `model.build(input_shape)"
n_input = X_train.shape[1]

In [14]:
# Create model function. Required by KerasClassifier
def create_model(dropout_rate, neurons):
  model = Sequential()
  model.add(Dense(neurons, input_dim=n_input, activation='relu'))
  model.add(Dropout(dropout_rate))
  model.add(Dense(3, activation='softmax'))
  # Model compile
  model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
  )
  return model


In [15]:
# Create/Instantiate? model.
model = KerasClassifier(model=create_model, verbose=1)


In [None]:
# Define grid search parameters. I'm using the same from my tutorial to start it off.
params= {
    'batch_size': [32],
    'epochs': [100],
    'callbacks': [EarlyStopping(monitor='loss', patience=3)],
    'model__dropout_rate': [.5, .75, .9],
    'model__neurons': [20, 30, 50]
}
# Grid Search
gs = GridSearchCV(
    estimator=model,
    param_grid=params,
    n_jobs=-1,
)
gs_result = gs.fit(X_train, y_train)

# Result summary
print(f"Best score: {gs_result.best_score_}. Used these parameters: {gs_result.best_params_}")

# This part copied from machine learning mastery prints out all results to check where improvements can be made
means = gs_result.cv_results_['mean_test_score']
stds = gs_result.cv_results_['std_test_score']
params = gs_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Best score: 0.9773559771626591. Used these parameters: {'batch_size': 32, 'callbacks': <keras.callbacks.EarlyStopping object at 0x7fa9fde58950>, 'epochs': 100, 'model__dropout_rate': 0.5, 'model__neurons': 50}
0.974683 (0.004067) with: {'batch_size': 32, 'callbacks': <keras.callbacks.EarlyStopping object at 0x7fa9fde58950>, 'epochs

### Evaluate Model

In [None]:
gs_result.best_estimator_.model_.evaluate(X_test, y_test)



[0.08910281211137772, 0.9819937944412231]