In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/mushroom-classification/mushrooms.csv')
df.head()

In [None]:
mapping = { 'cap-shape': {'b': 'bell', 'c': 'conical', 'x': 'convex', 'f': 'flat', 'k': 'knobbed', 'x': 'sunken'},
            'cap-surface': {'s': 'smooth', 'f': 'fibrous', 'g': 'grooves', 'y': 'scaly'},
            'cap-color': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'r': 'green', 'p':'pink',
                          'u': 'purple', 'e': 'red', 'w': 'white', 'y': 'yellow'},
            'bruises': {'t': 'bruises', 'f': 'no'},
            'odor': {'a': 'almond', 'l': 'anise', 'c': 'creosote', 'y': 'fishy', 'f': 'foul', 'm': 'musty', 
                     'n': 'none', 'p': 'pungent', 's': 'spicy'},
            'gill-attachment': {'a': 'attached', 'd': 'descending', 'f': 'free','n': 'notched'},
            'gill-spacing': {'c': 'close', 'w': 'crowded', 'd': 'distant'},
            'gill-size': {'b': 'broad', 'n': 'narrow'},
            'gill-color': {'k': 'black', 'n': 'brown', 'b': 'buff', 'h': 'chocolate', 'g': 'gray', 'r': 'green',
                           'o': 'orange', 'p': 'pink', 'u': 'purple', 'e': 'red', 'w': 'white', 'y': 'yellow'},
            'stalk-shape': {'e': 'enlarging', 't': 'tapering'},
            'stalk-root': {'b': 'bulbous', 'c': 'club', 'u': 'cup','e': 'equal', 'z': 'rhizomorphs', 
                           'r': 'rooted', '?': 'missing'},
            'stalk-surface-above-ring': {'f': 'fibrous', 'y': 'scaly', 'k': 'silky', 's': 'smooth'},
            'stalk-surface-below-ring': {'f': 'fibrous', 'y': 'scaly', 'k': 'silky', 's': 'smooth'},
            'stalk-color-above-ring': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'o': 'orange', 
                                       'p': 'pink', 'e': 'red', 'w': 'white', 'y': 'yellow'},
            'stalk-color-below-ring': {'n': 'brown', 'b': 'buff', 'c': 'cinnamon', 'g': 'gray', 'o': 'orange', 
                                       'p': 'pink', 'e': 'red', 'w': 'white', 'y': 'yellow'},
            'veil-type': {'p': 'partial', 'u': 'universal'},
            'veil-color': {'n': 'brown', 'o': 'orange', 'w': 'white', 'y': 'yellow'},
            'ring-number': {'n': 'none', 'o': 'one', 't': 'two'},
            'ring-type': {'c': 'cobwebby', 'e': 'evanescent', 'f': 'flaring', 'l': 'large', 'n': 'none', 
                          'p': 'pendant', 's': 'sheathing', 'z': 'zone'},
            'spore-print-color': {'k': 'black', 'n': 'brown', 'b': 'buff', 'h': 'chocolate', 'r': 'green', 
                                  'o': 'orange', 'u': 'purple', 'w': 'white', 'y': 'yellow'},
            'population': {'a': 'abundant', 'c': 'clustered', 'n': 'numerous', 's': 'scattered', 
                           'v': 'several', 'y': 'solitary'},
            'habitat': {'g': 'grasses', 'l': 'leaves', 'm': 'meadows', 'd': 'woods', 'p': 'paths', 
                        'u': 'urban', 'w': 'waste'},
            'class': {'e': 'edible', 'p':'poisonous'}
          }
df.replace(mapping, inplace = True)
df.head()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
sns.set(rc={'figure.figsize' : (7, 5)})
sns.countplot(x = 'class', data=df)

In [None]:
sns.set(rc={'figure.figsize' : (10, 5)})
sns.countplot(x="habitat", hue="class", order = df['habitat'].value_counts().index, data=df)

In [None]:
sns.countplot(x="habitat", order = df['habitat'].value_counts().index, data=df)

In [None]:
def plot_countplots(col, hue, palette1 = 'icefire', palette2 = 'rocket', fig_size = (10, 7)):
    sns.set(rc={'figure.figsize' : fig_size})
    print(df[col].value_counts().to_dict())
    fig, ax = plt.subplots(2,1)
    sns.countplot(x = col, order = df[col].value_counts().index, data=df, palette=palette1, ax = ax[0])
    sns.countplot(x = col, hue=hue, order = df[col].value_counts().index, data=df, palette=palette2, ax = ax[1])
    fig.show()

In [None]:
plot_countplots('habitat', 'class', 'PiYG', 'PRGn')

In [None]:
plot_countplots('cap-shape', 'class', 'BrBG', 'PuOr')

In [None]:
plot_countplots('cap-surface', 'class', 'RdGy', 'RdBu',)

In [None]:
plot_countplots('cap-color', 'class', 'bwr', 'seismic')

In [None]:
plot_countplots('bruises', 'class', 'Spectral', 'coolwarm')

In [None]:
plot_countplots('odor', 'class', 'RdYlBu', 'RdYlGn')

In [None]:
plot_countplots('gill-attachment', 'class', 'binary', 'gist_yarg')

In [None]:
plot_countplots('gill-spacing', 'class', 'gist_gray', 'gray')

In [None]:
plot_countplots('gill-size', 'class', 'bone', 'pink')

In [None]:
plot_countplots('gill-color', 'class', 'cool', 'Wistia')

In [None]:
plot_countplots('stalk-shape', 'class', 'autumn', 'winter')

In [None]:
plot_countplots('stalk-root', 'class', 'spring', 'summer')

In [None]:
plot_countplots('stalk-surface-above-ring', 'class', 'gist_heat', 'copper')

In [None]:
plot_countplots('stalk-surface-below-ring', 'class', 'hot', 'afmhot')

In [None]:
plot_countplots('stalk-color-above-ring', 'class', 'magma', 'cividis')

In [None]:
plot_countplots('stalk-color-below-ring', 'class', 'viridis', 'plasma') 

In [None]:
plot_countplots('veil-type', 'class', 'plasma', 'inferno', (6,5))

In [None]:
plot_countplots('veil-color', 'class', 'Oranges', 'Reds')

In [None]:
plot_countplots('ring-number', 'class', 'Greys', 'Purples')

In [None]:
plot_countplots('ring-type', 'class', 'Blues', 'Greens')

In [None]:
plot_countplots('spore-print-color', 'class', 'BuGn', 'YlGn')

In [None]:
plot_countplots('population', 'class', 'YlOrRd', 'OrRd')

In [None]:
backup_df = df.copy()

In [None]:
X = df.drop('class', axis = 1)
X.head()

In [None]:
encoder = OneHotEncoder(handle_unknown='ignore')
X = encoder.fit_transform(X)
encoder.get_feature_names()

In [None]:
encoder.get_params()

In [None]:
y = df['class']
y.head()

In [None]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
                                 max_depth=1, random_state=0).fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
clf = RandomForestClassifier(max_depth=2, random_state=0).fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)