## Load Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from treeviz_util import tree_print
from sklearn.model_selection import train_test_split

## Load Dataset

In [None]:
df = pd.read_csv('/kaggle/input/mushroom-classification/mushrooms.csv')
df.head()

## Convert Labels
The labels are converted from single letters to word so it is clearer when you read it.

In [None]:
df = df.replace({'class': {'e': 'edible', 'p': 'poisonous'},
                'cap-shape': {'b': 'bell', 'c': 'conical', 'x': 'convex', 'f': 'flat', 'k': 'knobbed', 's': 'sunken'},
				'cap-surface': {'f': 'fibrous', 'g': 'grooves', 'y': 'scaly', 's': 'smooth'},
				'cap-color': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'r': 'green', 'p': 'pink', 'u': 'purple', 'e': 'red', 'w': 'white', 'y': 'yellow'},
				'bruises': {'t': 'yes', 'f': 'no'},
				'odor': {'a': 'almond', 'l': 'anise', 'c': 'creosote', 'y': 'fishy', 'f': 'foul', 'm': 'musty', 'n': 'none', 'p': 'pungent', 's': 'spicy'},
				'gill-attachment': {'a': 'attached', 'd': 'descending', 'f': 'free', 'n': 'notched'},
				'gill-spacing': {'c': 'close', 'w': 'crowded', 'd': 'distant'},
				'gill-size': {'b': 'broad', 'n': 'narrow'},
				'gill-color': {'k': 'black', 'h': 'chocolate', 'n': 'brown', 'b': 'buff', 'g': 'gray', 'r': 'green', 'p': 'pink', 'u': 'purple', 'e': 'red', 'w': 'white', 'y': 'yellow', 'o': 'orange'},
				'stalk-shape': {'e': 'enlarging', 't': 'tapering'},
				'stalk-root': {'b': 'bulbous', 'c': 'club', 'u': 'cup', 'e': 'equal', 'z': 'rhizomorphs', 'r': 'rooted', '?': 'missing'},
				'stalk-surface-above-ring': {'k': 'silky', 'f': 'fibrous', 'y': 'scaly', 's': 'smooth'},
				'stalk-surface-below-ring': {'k': 'silky', 'f': 'fibrous', 'y': 'scaly', 's': 'smooth'},
				'stalk-color-above-ring': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'o': 'orange', 'p': 'pink', 'e': 'red', 'w': 'white', 'y': 'yellow'},
				'stalk-color-below-ring': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'o': 'orange', 'p': 'pink', 'e': 'red', 'w': 'white', 'y': 'yellow'},
				'veil-type': {'p': 'partial', 'u': 'universal'},
				'veil-color': {'n': 'brown', 'o': 'orange', 'w': 'white', 'y': 'yellow'},
				'ring-number': {'n': 'none', 'o': 'one', 't': 'two'},
				'ring-type': {'c': 'cobwebby', 'e': 'evanescent', 'f': 'flaring', 'l': 'large', 'n': 'none', 'p': 'pendant', 's': 'sheathing', 'z': 'zone'},
				'spore-print-color': {'k': 'black', 'h': 'chocolate', 'n': 'brown', 'b': 'buff', 'r': 'green', 'u': 'purple', 'w': 'white', 'y': 'yellow', 'o': 'orange'},
				'population': {'a': 'abundant', 'c': 'clustered', 'n': 'numerous', 's': 'scattered', 'v': 'several', 'y': 'solitary'},
				'habitat': {'g': 'grasses', 'l': 'leaves', 'm': 'meadows', 'p': 'paths', 'u': 'urban', 'w': 'waste', 'd': 'woods'}})
df.head()

## Split into X and y data
The data is split so the column 'class' is the y variable and the other columns the X variable. X has been one-hot encoded.

In [None]:
y = df['class']
X = df.drop(columns=['class'])
X = pd.get_dummies(X)
X.head()

## Split into testing and training data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

## Correlation between columns

In [None]:
y_num = y.replace({'edible': 0, 'poisonous': 1})
corrs = pd.DataFrame(columns=['name', 'corr'])
for c in X.columns:
    corr = y_num.corr(X[c])
    corrs = corrs.append(pd.DataFrame([[c, corr]], columns=['name', 'corr']))
corrs.sort_values(by=['corr'])

## Decision Tree

In [None]:
dtc = tree.DecisionTreeClassifier(max_depth=3)
dtc.fit(X_train, y_train)
score = dtc.score(X_test, y_test)
print(f'Score: {score}\n')
tree_print(dtc, X)

## Multi-layer Perceptron

In [None]:
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=5)
mlp.fit(X_train, y_train)
mlp.score(X_test, y_test)

In [None]:
[coef.shape for coef in mlp.coefs_]

### Grid Search

In [None]:
mlp_grid = MLPClassifier(solver='lbfgs')
parameters = {
    'alpha': [10.0 ** -i for i in range(1, 7)],
    'hidden_layer_sizes': [5, 39, 117]
}
grid = GridSearchCV(mlp_grid, parameters)
grid.fit(X_train, y_train)
grid_results = pd.DataFrame(grid.cv_results_)
grid_results