### Importing All Required Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import cross_val_score

#### Collecting Data

In [None]:
df = pd.read_csv('../input/mushroom-classification/mushrooms.csv')
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

#### Analyse Data

In [None]:
df.isnull().sum()

In [None]:
# Diplaying all different values in the columns
for col in list(df):
    print(col,':------------- ',df[col].unique())
    

Droping veil-type since it has only one attribute value

In [None]:
df.drop(['veil-type'], axis=1, inplace=True)
df.head()

Renaming Column Class to the Target

In [None]:
df.rename(columns ={'class':'target'},inplace=True)
df

Replacing the Categorical Value of the Target value to the integer type

In [None]:
df['target'] = df['target'].replace({'p':1,'e':0})
df

In [None]:
df['target'].value_counts()/df.shape[0]

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(x='target', data=df)
plt.title('No. of Poisonous Mushroom (1) and Non-Poisonous Mushroom (0) ')
plt.show()

Visualization of All Columns Attribute

In [None]:
for col in df.select_dtypes('object'):
    plt.figure(figsize=(10,5))
    sns.countplot(x=col,hue='target',data=df)
    plt.title(col)
    plt.legend(bbox_to_anchor =(1,0.5));
    plt.show()

From visuals, we can see that gill-attachement, veil-color and ring-number could be drop since they are almost constant and doesn't affect the dataset

In [None]:
# Dropping the useless columns attributes values
df.drop(['gill-attachment','veil-color','ring-number'],axis=1,inplace=True)
df

In [None]:
# Transforming all the columns having two different values

df['bruises'].replace({'t':1,'f':0},inplace =True)
df['gill-spacing'].replace({'c':1,'w':0},inplace =True)
df['stalk-shape'].replace({'e':1,'t':0},inplace =True)
df['gill-size'].replace({'n':1,'b':0},inplace =True)

In [None]:
df

In [None]:
# Replacing the remainging categorical values into the dummie values
df =pd.get_dummies(df, drop_first=True)
df

Creating Our Model

In [None]:
#Separate target values and features values in y and X variables
X = df.drop('target',axis=1)
y = df['target']

#Split the train_set and test_set
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,shuffle=True)

Training Our Model

In [None]:
#KNEIGHBORS CLASSIFIER

knn = KNeighborsClassifier()
knn_model = knn.fit(X_train,y_train)
print(cross_val_score(knn,X_train,y_train,cv=5,scoring='recall').mean())

In [None]:
#DECISION TREE CLASSIFIER

tree = DecisionTreeClassifier()
tree_model = tree.fit(X_train,y_train)
print(cross_val_score(tree,X_train,y_train,cv=5,scoring='recall').mean())

In [None]:
#LOGISTIC REGRESSION

logic = LogisticRegression()
logic_model = logic.fit(X_train,y_train)
print(cross_val_score(logic,X_train,y_train,cv=5,scoring='recall').mean())

We use scoring as recall instead of accuracy, since we have to correctly classify all the poisonous Mushroom.

Testing Our Model

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
#KNEIGHBORS CLASSIFIER

knn_pred = knn_model.predict(X_test)
print('KNEIGHBORS CLASSIFIER')
print(confusion_matrix(y_test,knn_pred))
print(classification_report(y_test,knn_pred))
print(f"Accuracy : {accuracy_score(y_test,knn_pred)*100} %")

In [None]:
#DECISION TREE CLASSIFIER

tree_pred = tree_model.predict(X_test)
print('DECISION TREE CLASSIFIER')
print(confusion_matrix(y_test,tree_pred))
print(classification_report(y_test,tree_pred))
print(f"Accuracy : {accuracy_score(y_test,tree_pred)*100} %")

In [None]:
#LOGISTIC REGRESSION

logic_pred = logic_model.predict(X_test)
print('LOGISTIC REGRESSION')
print(confusion_matrix(y_test,logic_pred))
print(classification_report(y_test,logic_pred))
print(f"Accuracy : {accuracy_score(y_test,logic_pred)*100} %")

Hence, we have 100% correctly predicited our test dataset
