Introduction
The Mushrooms Database contains data about several thousands types of mushrooms, both edible and poisonous. We will use several predictive models to guess the type (edible or poisonous) of the species. The data contains attributes for the cap (shape, surface, color), gill (attachment, spacing, size, color), bruises, stalk (shape, root, surface above ring, surface below ring, color above ring, color below ring), veil (type, color), ring (number, type), spore print color, population, habitat.

Here i am planning to use a Ensemble approach with Mulitple models such a RF,XGB,Decsion tree etc.
We notice the data is very clean and has no noise.

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas                        as pd
import numpy                         as np
import matplotlib.pyplot             as plt
%matplotlib inline
import seaborn                       as sns
from sklearn import preprocessing


from sklearn.preprocessing           import normalize,StandardScaler,label
from sklearn.model_selection         import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics                 import *

# ML Libraries

from sklearn.ensemble                import RandomForestClassifier
from sklearn.decomposition           import PCA
from xgboost                         import XGBClassifier
from sklearn.linear_model            import LogisticRegression
from lightgbm                        import LGBMClassifier
from sklearn.tree                    import DecisionTreeClassifier
from sklearn.neighbors               import KNeighborsClassifier

In [None]:
df = pd.read_csv("../input/mushroom-classification/mushrooms.csv")

In [None]:
df.head()

In [None]:
# Visualize the Data and check for class balance
for i,col in enumerate(df):
    plt.figure(i)
    sns.countplot(x=df[col])

In [None]:
# Convert to Numberical Values
labelEncoder = preprocessing.LabelEncoder()
for col in df.columns:
    df[col] = labelEncoder.fit_transform(df[col])

In [None]:
# Check if any Null Values
df.isnull().any()

In [None]:
#Create Data for training
Y = df['class']
X = df.iloc[:,1:]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=7,shuffle=True)

In [None]:
#listing out the different ML Algorithms
models = []
models.append(('Random Forest with Entropy', RandomForestClassifier(criterion= "entropy", random_state= 111)))
models.append(('Random Forest with gini', RandomForestClassifier(criterion= "gini", random_state= 111)))
models.append(('XGBoost', XGBClassifier()))
models.append(('LGBM', LGBMClassifier()))
models.append(('DecisionTree with entropy', DecisionTreeClassifier(criterion= "entropy", random_state= 101)))
models.append(('DecisionTree with gini', DecisionTreeClassifier(criterion= "gini", random_state= 101)))
models.append(('Logistic Regression', LogisticRegression(random_state= 7)))
models.append(('KNN', KNeighborsClassifier(n_neighbors=10)))

In [None]:
#Predefined ROC Function
def ROCcurve(fpr, tpr):
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.2f)' % auc(fpr, tpr))
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    return (plt.show())

In [None]:
# Checking with Multiple accuracy metrics and check for Overfitting
def allmodels():
    model_list = pd.DataFrame(columns=("Model","Accuracy","F1Score","AUC"))
    rownumber = 0
    for name, model in models:
        classifier = model
        classifier.fit(X_train, y_train)
        # prediction
        Y_predict = classifier.predict(X_test)
        #ROCcurve(fpr, tpr)
        model_list.loc[rownumber,"Model"]= name
        model_list.loc[rownumber,"Accuracy"] = round(((accuracy_score(y_test,Y_predict))*100))
        model_list.loc[rownumber,"F1Score"]= round((f1_score(y_test,Y_predict)),2)
        model_list.loc[rownumber,"AUC"]= round((roc_auc_score(y_test,Y_predict)),2)
        Y_pt = classifier.predict(X_train)
        model_list.loc[rownumber,"Accuracy_Train"] = round(((accuracy_score(y_train,Y_pt))*100))
        model_list.loc[rownumber,"F1Score_Train"]= round((f1_score(y_train,Y_pt)),2)
        model_list.loc[rownumber,"AUC_Train"]= round((roc_auc_score(y_train,Y_pt)),2)
        rownumber += 1
    return (model_list.sort_values(by="AUC",ascending=False))

In [None]:
#Check for any overfitting
print (allmodels())

We were able to predict with very high accuracy the poisonous and edible mushrooms based on these models