## Preprocessing

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from keras.models import load_model

#  Import and read the charity_data.csv.
import pandas as pd 
mushrooms_df = pd.read_csv("agaricus-lepiota.csv")
mushrooms_df

Unnamed: 0,edible,capShape,capSurface,capColor,bruises,odor,gillAttachment,gillSpacing,gillSize,gillColor,...,stalkSBR,stalkCAR,stalkCBR,veilType,veilColor,ringNumber,ringType,sporePrintColor,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [2]:
#Drop Stalk Root since some values have '?'
mushrooms_df = mushrooms_df.drop(['stalkRoot'],1)
mushrooms_df

Unnamed: 0,edible,capShape,capSurface,capColor,bruises,odor,gillAttachment,gillSpacing,gillSize,gillColor,...,stalkSBR,stalkCAR,stalkCBR,veilType,veilColor,ringNumber,ringType,sporePrintColor,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [3]:
#Replace two values in 'edible' column with 1 and 0. This will end up as our 'y' or target value.
edible_dict = {'e': 1, 'p': 0}
mushrooms_df2 = mushrooms_df.replace({'edible': edible_dict})
mushrooms_df2

Unnamed: 0,edible,capShape,capSurface,capColor,bruises,odor,gillAttachment,gillSpacing,gillSize,gillColor,...,stalkSBR,stalkCAR,stalkCBR,veilType,veilColor,ringNumber,ringType,sporePrintColor,population,habitat
0,0,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,1,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,1,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,0,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,1,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,1,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,1,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,0,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [4]:
# Determine the number of unique values in each column.
columns = mushrooms_df2.columns
for column in columns:
    print(column, ':  ', mushrooms_df2[column].nunique())

edible :   2
capShape :   6
capSurface :   4
capColor :   10
bruises :   2
odor :   9
gillAttachment :   2
gillSpacing :   2
gillSize :   2
gillColor :   12
stalkShape :   2
stalkSAR :   4
stalkSBR :   4
stalkCAR :   9
stalkCBR :   9
veilType :   1
veilColor :   4
ringNumber :   3
ringType :   5
sporePrintColor :   9
population :   6
habitat :   7


In [5]:
#Find all non-numeric columns

columns_to_encode = []

list = mushrooms_df2.dtypes
list2 = mushrooms_df2.columns

for i in range(len(list)):
    if (list[i] == 'object'):
        columns_to_encode.append(list2[i])
        
print(columns_to_encode)

['capShape', 'capSurface', 'capColor', 'bruises', 'odor', 'gillAttachment', 'gillSpacing', 'gillSize', 'gillColor', 'stalkShape', 'stalkSAR', 'stalkSBR', 'stalkCAR', 'stalkCBR', 'veilType', 'veilColor', 'ringNumber', 'ringType', 'sporePrintColor', 'population', 'habitat']


In [6]:
# Convert categorical data to numeric with `pd.get_dummies`
mushrooms_df2 = pd.get_dummies(mushrooms_df2,columns=columns_to_encode,drop_first=True)
mushrooms_df2

Unnamed: 0,edible,capShape_c,capShape_f,capShape_k,capShape_s,capShape_x,capSurface_g,capSurface_s,capSurface_y,capColor_c,...,population_n,population_s,population_v,population_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0,0,0,0,0,1,0,1,0,0,...,0,1,0,0,0,0,0,0,1,0
1,1,0,0,0,0,1,0,1,0,0,...,1,0,0,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,1,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0
4,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,0,0,1,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
8120,1,0,0,0,0,1,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
8121,1,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
8122,0,0,0,1,0,0,0,0,1,0,...,0,0,1,0,0,1,0,0,0,0


In [7]:
# Split our preprocessed data into our features and target arrays
y = mushrooms_df2["edible"].values
X = mushrooms_df2.drop(["edible"],1).values

# Split the preprocessed data into a training and testing dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [9]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  25
hidden_nodes_layer2 = 15


nn1 = tf.keras.models.Sequential()

# First hidden layer
nn1.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 25)                2300      
                                                                 
 dense_1 (Dense)             (None, 15)                390       
                                                                 
 dense_2 (Dense)             (None, 1)                 16        
                                                                 
Total params: 2,706
Trainable params: 2,706
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile the model
nn1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [11]:
# Train the model
fit_model = nn1.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [12]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

64/64 - 0s - loss: 1.8145e-08 - accuracy: 1.0000 - 125ms/epoch - 2ms/step
Loss: 1.814515471210143e-08, Accuracy: 1.0


In [13]:
#Try with a reduced set of mushroom characteristics
mushrooms_df3 = mushrooms_df[{'edible','capSurface','bruises','odor','gillAttachment','gillColor','veilType',
                             'sporePrintColor','habitat'}]
mushrooms_df3

Unnamed: 0,capSurface,sporePrintColor,gillColor,edible,odor,habitat,bruises,gillAttachment,veilType
0,s,k,k,p,p,u,t,f,p
1,s,n,k,e,a,g,t,f,p
2,s,n,n,e,l,m,t,f,p
3,y,k,n,p,p,u,t,f,p
4,s,n,k,e,n,g,f,f,p
...,...,...,...,...,...,...,...,...,...
8119,s,b,y,e,n,l,f,a,p
8120,s,b,y,e,n,l,f,a,p
8121,s,b,n,e,n,l,f,a,p
8122,y,w,b,p,y,l,f,f,p


In [14]:
#Replace two values in 'edible' column with 1 and 0. This will end up as our 'y' or target value.
edible_dict = {'e': 1, 'p': 0}
mushrooms_df3 = mushrooms_df3.replace({'edible': edible_dict})
mushrooms_df3

Unnamed: 0,capSurface,sporePrintColor,gillColor,edible,odor,habitat,bruises,gillAttachment,veilType
0,s,k,k,0,p,u,t,f,p
1,s,n,k,1,a,g,t,f,p
2,s,n,n,1,l,m,t,f,p
3,y,k,n,0,p,u,t,f,p
4,s,n,k,1,n,g,f,f,p
...,...,...,...,...,...,...,...,...,...
8119,s,b,y,1,n,l,f,a,p
8120,s,b,y,1,n,l,f,a,p
8121,s,b,n,1,n,l,f,a,p
8122,y,w,b,0,y,l,f,f,p


In [15]:
# Determine the number of unique values in each column.
columns = mushrooms_df3.columns
for column in columns:
    print(column, ':  ', mushrooms_df3[column].nunique())

capSurface :   4
sporePrintColor :   9
gillColor :   12
edible :   2
odor :   9
habitat :   7
bruises :   2
gillAttachment :   2
veilType :   1


In [16]:
#Find all non-numeric columns

columns_to_encode = []

list = mushrooms_df3.dtypes
list2 = mushrooms_df3.columns

for i in range(len(list)):
    if (list[i] == 'object'):
        columns_to_encode.append(list2[i])
        
print(columns_to_encode)

['capSurface', 'sporePrintColor', 'gillColor', 'odor', 'habitat', 'bruises', 'gillAttachment', 'veilType']


In [17]:
# Convert categorical data to numeric with `pd.get_dummies`
mushrooms_df3 = pd.get_dummies(mushrooms_df3,columns=columns_to_encode,drop_first=True)
mushrooms_df3

Unnamed: 0,edible,capSurface_g,capSurface_s,capSurface_y,sporePrintColor_h,sporePrintColor_k,sporePrintColor_n,sporePrintColor_o,sporePrintColor_r,sporePrintColor_u,...,odor_s,odor_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w,bruises_t,gillAttachment_f
0,0,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
1,1,0,1,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,1,1
2,1,0,1,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,1,1
3,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
4,1,0,1,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8120,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8121,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8122,0,0,0,1,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,1


In [18]:
# Split our preprocessed data into our features and target arrays
y = mushrooms_df3["edible"].values
X = mushrooms_df3.drop(["edible"],1).values

# Split the preprocessed data into a training and testing dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [19]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [20]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  15
hidden_nodes_layer2 = 6


nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 15)                585       
                                                                 
 dense_4 (Dense)             (None, 6)                 96        
                                                                 
 dense_5 (Dense)             (None, 1)                 7         
                                                                 
Total params: 688
Trainable params: 688
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [22]:
# Train the model
fit_model = nn2.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [23]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

64/64 - 0s - loss: 0.0018 - accuracy: 0.9995 - 112ms/epoch - 2ms/step
Loss: 0.0017553915968164802, Accuracy: 0.9995076060295105


In [24]:
#Try with a reduced set of mushroom characteristics --- Need a couple more features!
mushrooms_df4 = mushrooms_df[{'edible','capSurface','bruises','odor','gillAttachment','gillColor','veilType',
                             'sporePrintColor','habitat','ringNumber','ringType','population'}]
mushrooms_df4

Unnamed: 0,capSurface,sporePrintColor,population,gillColor,edible,odor,habitat,ringNumber,bruises,gillAttachment,veilType,ringType
0,s,k,s,k,p,p,u,o,t,f,p,p
1,s,n,n,k,e,a,g,o,t,f,p,p
2,s,n,n,n,e,l,m,o,t,f,p,p
3,y,k,s,n,p,p,u,o,t,f,p,p
4,s,n,a,k,e,n,g,o,f,f,p,e
...,...,...,...,...,...,...,...,...,...,...,...,...
8119,s,b,c,y,e,n,l,o,f,a,p,p
8120,s,b,v,y,e,n,l,o,f,a,p,p
8121,s,b,c,n,e,n,l,o,f,a,p,p
8122,y,w,v,b,p,y,l,o,f,f,p,e


In [25]:
#Replace two values in 'edible' column with 1 and 0. This will end up as our 'y' or target value.
edible_dict = {'e': 1, 'p': 0}
mushrooms_df4 = mushrooms_df4.replace({'edible': edible_dict})
mushrooms_df4

Unnamed: 0,capSurface,sporePrintColor,population,gillColor,edible,odor,habitat,ringNumber,bruises,gillAttachment,veilType,ringType
0,s,k,s,k,0,p,u,o,t,f,p,p
1,s,n,n,k,1,a,g,o,t,f,p,p
2,s,n,n,n,1,l,m,o,t,f,p,p
3,y,k,s,n,0,p,u,o,t,f,p,p
4,s,n,a,k,1,n,g,o,f,f,p,e
...,...,...,...,...,...,...,...,...,...,...,...,...
8119,s,b,c,y,1,n,l,o,f,a,p,p
8120,s,b,v,y,1,n,l,o,f,a,p,p
8121,s,b,c,n,1,n,l,o,f,a,p,p
8122,y,w,v,b,0,y,l,o,f,f,p,e


In [26]:
# Determine the number of unique values in each column.
columns = mushrooms_df4.columns
for column in columns:
    print(column, ':  ', mushrooms_df4[column].nunique())

capSurface :   4
sporePrintColor :   9
population :   6
gillColor :   12
edible :   2
odor :   9
habitat :   7
ringNumber :   3
bruises :   2
gillAttachment :   2
veilType :   1
ringType :   5


In [27]:
#Find all non-numeric columns

columns_to_encode = []

list = mushrooms_df4.dtypes
list2 = mushrooms_df4.columns

for i in range(len(list)):
    if (list[i] == 'object'):
        columns_to_encode.append(list2[i])
        
print(columns_to_encode)

['capSurface', 'sporePrintColor', 'population', 'gillColor', 'odor', 'habitat', 'ringNumber', 'bruises', 'gillAttachment', 'veilType', 'ringType']


In [28]:
# Convert categorical data to numeric with `pd.get_dummies`
mushrooms_df4 = pd.get_dummies(mushrooms_df4,columns=columns_to_encode,drop_first=True)
mushrooms_df4

Unnamed: 0,edible,capSurface_g,capSurface_s,capSurface_y,sporePrintColor_h,sporePrintColor_k,sporePrintColor_n,sporePrintColor_o,sporePrintColor_r,sporePrintColor_u,...,habitat_u,habitat_w,ringNumber_o,ringNumber_t,bruises_t,gillAttachment_f,ringType_f,ringType_l,ringType_n,ringType_p
0,0,0,1,0,0,1,0,0,0,0,...,1,0,1,0,1,1,0,0,0,1
1,1,0,1,0,0,0,1,0,0,0,...,0,0,1,0,1,1,0,0,0,1
2,1,0,1,0,0,0,1,0,0,0,...,0,0,1,0,1,1,0,0,0,1
3,0,0,0,1,0,1,0,0,0,0,...,1,0,1,0,1,1,0,0,0,1
4,1,0,1,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
8120,1,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
8121,1,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
8122,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0


In [29]:
# Split our preprocessed data into our features and target arrays
y = mushrooms_df4["edible"].values
X = mushrooms_df4.drop(["edible"],1).values

# Split the preprocessed data into a training and testing dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [30]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [31]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  25
hidden_nodes_layer2 = 15


nn3 = tf.keras.models.Sequential()

# First hidden layer
nn3.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 25)                1250      
                                                                 
 dense_7 (Dense)             (None, 15)                390       
                                                                 
 dense_8 (Dense)             (None, 1)                 16        
                                                                 
Total params: 1,656
Trainable params: 1,656
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Compile the model
nn3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [33]:
# Train the model
fit_model = nn3.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [34]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

64/64 - 0s - loss: 5.8225e-09 - accuracy: 1.0000 - 109ms/epoch - 2ms/step
Loss: 5.822507365138563e-09, Accuracy: 1.0


In [35]:
#Try with a reduced set of mushroom characteristics based on the 6 attributes used in the documented 'rules'
mushrooms_df5 = mushrooms_df[{'edible','odor','capColor','stalkSBR',
                             'sporePrintColor','habitat','population'}]
mushrooms_df5

Unnamed: 0,sporePrintColor,population,capColor,edible,odor,habitat,stalkSBR
0,k,s,n,p,p,u,s
1,n,n,y,e,a,g,s
2,n,n,w,e,l,m,s
3,k,s,w,p,p,u,s
4,n,a,g,e,n,g,s
...,...,...,...,...,...,...,...
8119,b,c,n,e,n,l,s
8120,b,v,n,e,n,l,s
8121,b,c,n,e,n,l,s
8122,w,v,n,p,y,l,k


In [36]:
#Replace two values in 'edible' column with 1 and 0. This will end up as our 'y' or target value.
edible_dict = {'e': 1, 'p': 0}
mushrooms_df5 = mushrooms_df5.replace({'edible': edible_dict})
mushrooms_df5

Unnamed: 0,sporePrintColor,population,capColor,edible,odor,habitat,stalkSBR
0,k,s,n,0,p,u,s
1,n,n,y,1,a,g,s
2,n,n,w,1,l,m,s
3,k,s,w,0,p,u,s
4,n,a,g,1,n,g,s
...,...,...,...,...,...,...,...
8119,b,c,n,1,n,l,s
8120,b,v,n,1,n,l,s
8121,b,c,n,1,n,l,s
8122,w,v,n,0,y,l,k


In [37]:
# Determine the number of unique values in each column.
columns = mushrooms_df5.columns
for column in columns:
    print(column, ':  ', mushrooms_df5[column].nunique())

sporePrintColor :   9
population :   6
capColor :   10
edible :   2
odor :   9
habitat :   7
stalkSBR :   4


In [38]:
#Find all non-numeric columns

columns_to_encode = []

list = mushrooms_df5.dtypes
list2 = mushrooms_df5.columns

for i in range(len(list)):
    if (list[i] == 'object'):
        columns_to_encode.append(list2[i])
        
print(columns_to_encode)

['sporePrintColor', 'population', 'capColor', 'odor', 'habitat', 'stalkSBR']


In [39]:
# Convert categorical data to numeric with `pd.get_dummies`
mushrooms_df5 = pd.get_dummies(mushrooms_df5,columns=columns_to_encode,drop_first=True)
mushrooms_df5

Unnamed: 0,edible,sporePrintColor_h,sporePrintColor_k,sporePrintColor_n,sporePrintColor_o,sporePrintColor_r,sporePrintColor_u,sporePrintColor_w,sporePrintColor_y,population_c,...,odor_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w,stalkSBR_k,stalkSBR_s,stalkSBR_y
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
1,1,0,0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
4,1,0,0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,1,0
8120,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
8121,1,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,1,0
8122,0,0,0,0,0,0,0,1,0,0,...,1,0,1,0,0,0,0,1,0,0


In [40]:
# Split our preprocessed data into our features and target arrays
y = mushrooms_df5["edible"].values
X = mushrooms_df5.drop(["edible"],1).values

# Split the preprocessed data into a training and testing dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [41]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [42]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  5
hidden_nodes_layer2 = 5


nn4 = tf.keras.models.Sequential()

# First hidden layer
nn4.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
# nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))

# Output layer
nn4.add(tf.keras.layers.Dense(input_dim=number_input_features, units=1, activation="sigmoid"))

# Check the structure of the model
nn4.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 5)                 200       
                                                                 
 dense_10 (Dense)            (None, 1)                 6         
                                                                 
Total params: 206
Trainable params: 206
Non-trainable params: 0
_________________________________________________________________


In [43]:
# Compile the model
nn4.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [44]:
# Train the model
fit_model = nn4.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [45]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn4.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

64/64 - 0s - loss: 4.9867e-05 - accuracy: 1.0000 - 104ms/epoch - 2ms/step
Loss: 4.986745261703618e-05, Accuracy: 1.0
