# Mushroom Classification

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow import keras
from functools import partial

In [2]:
# Loading the data

df = pd.read_csv('mushrooms.csv')
df.head()
df.shape

(8124, 23)

In [6]:
# Data preprocessing

df['class'] = df['class'].map({'p': 1, 'e': 0})
class_by_population = df.groupby(['population'])['class'].value_counts(normalize=True).unstack()
class_by_population = class_by_population.sort_values(by=1, ascending=False)
class_by_habitat = df.groupby(['habitat'])['class'].value_counts(normalize=True).unstack()
class_by_habitat = class_by_habitat.sort_values(by=1, ascending=False)

y = df.loc[:,'class'].values
X = df.drop(['class'], axis=1)

encoder = OneHotEncoder(drop='first')
X = encoder.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, shuffle=True)

In [7]:
# Model Creation

# MyDense = partial(keras.layers.Dense, activation="selu", kernel_initializer="lecun_normal")
MyDense = partial(keras.layers.Dense, activation="relu", kernel_initializer="he_normal")

input_layer = keras.layers.Input(shape=X_train.shape[1:])

# hidden1 = MyDense(20)(input_layer)
# hidden2 = MyDense(10)(hidden1)
hidden1 = MyDense(10)(input_layer)
hidden2 = MyDense(5)(hidden1)

# dropout = keras.layers.Dropout(rate=0.2)(hidden2)
dropout = keras.layers.Dropout(rate=0.3)(hidden2)

output = MyDense(1, activation="sigmoid", kernel_initializer="uniform")(dropout)

model = keras.models.Model(inputs=[input_layer], outputs=[output])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
lr_scheduler_cb = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)

model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])



In [8]:
# Model Training

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

model.save('model.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [9]:
# Model Evaluate

val_loss, val_acc = model.evaluate(X_test, y_test)

print("Loss: ",val_loss)
print("Accuracy: ",val_acc*100, "%")

Loss:  1.4228721738618333e-06
Accuracy:  100.0 %
