In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, plot_confusion_matrix , accuracy_score

In [None]:
df=pd.read_csv('../input/mushroom-classification/mushrooms.csv')
df.head()

In [None]:
df.describe(include='all')

In [None]:
df.isnull().sum()

# Label Encoding to Categorical Variables

In [None]:
categorical_cols = [col for col in df.columns if df[col].dtype=='object']
label_encoder=LabelEncoder()
labelled_df=df.copy()
for col in categorical_cols:
    labelled_df[col]=label_encoder.fit_transform(df[col])
labelled_df.head()

In [None]:
correlation=labelled_df.corr()
plt.figure(figsize=(15,10))
sns.heatmap(correlation,annot=True)

In [None]:
labelled_df.drop('veil-type',axis=1,inplace=True)

# Train Test Split

In [None]:
y=labelled_df.iloc[:,0]
X=labelled_df.iloc[:,1:22]

In [None]:
scaler=StandardScaler()
feature_set=scaler.fit_transform(X)
feature_set

In [None]:
X_train,X_val,y_train,y_val=train_test_split(feature_set,y,test_size=0.2,random_state=0)
X_train

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr=LogisticRegression(C=0.01,solver='sag')
lr.fit(X_train,y_train)
pred=lr.predict(X_val)
lr_acc=accuracy_score(pred,y_val)
lr_acc

In [None]:
plot_confusion_matrix(lr,X_val,y_val,display_labels=['Poison','No Poison'])

# Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

nb=GaussianNB()
nb.fit(X_train,y_train)
pred=nb.predict(X_val)
nb_acc=accuracy_score(pred,y_val)
nb_acc

In [None]:
plot_confusion_matrix(nb,X_val,y_val,display_labels=['Poison','No Poison'])

# KNearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)
pred=knn.predict(X_val)
knn_acc=accuracy_score(pred,y_val)
knn_acc

In [None]:
plot_confusion_matrix(knn,X_val,y_val,display_labels=['Poison','No Poison'])

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

tree=DecisionTreeClassifier(criterion='entropy')
tree.fit(X_train,y_train)
pred=tree.predict(X_val)
tree_acc=accuracy_score(pred,y_val)
tree_acc

In [None]:
plot_confusion_matrix(tree,X_val,y_val,display_labels=['Poison','No Poison'])

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

forest=RandomForestClassifier(n_estimators=100,random_state=0)
forest.fit(X_train,y_train)
pred=forest.predict(X_val)
forest_acc=accuracy_score(pred,y_val)
forest_acc

In [None]:
plot_confusion_matrix(forest,X_val,y_val,display_labels=['Poison','No Poison'])

# Xg Boost

In [None]:
from xgboost import XGBClassifier

xgb=XGBClassifier()
xgb.fit(X_train,y_train)
pred=xgb.predict(X_val)
xgb_acc=accuracy_score(pred,y_val)
xgb_acc

In [None]:
plot_confusion_matrix(xgb,X_val,y_val,display_labels=['Poison','No Poison'])

# Models' Accurccy

In [None]:
df_acc=pd.DataFrame({
    'Models':['Logistic Regression','Gaussian Naive Bayes','KNeighbors','Decision Tree','Random Forest','Xg Boost'],
    'Accuracy':[lr_acc,nb_acc,knn_acc,tree_acc,forest_acc,xgb_acc]
})
df_acc.sort_values(by='Accuracy',ascending=False)