In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
import os
mush = pd.read_csv("../input/mushrooms.csv")

In [None]:
mush.head()

In [None]:
sns.heatmap(mush.isna(),cmap='coolwarm')
# there is no missing data

In [None]:
mush.describe()

In [None]:
X = mush.drop('class',axis=1)

In [None]:
y = mush['class']

In [None]:
y.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
labelencoder = LabelEncoder()

In [None]:
for col in X.columns:
    X[col] = labelencoder.fit_transform(X[col])

In [None]:
X.head()

In [None]:
y = labelencoder.fit_transform(y)

In [None]:
y
# poisonous =1
# edible =0

In [None]:
X = pd.get_dummies(X, columns=X.columns)

In [None]:
X.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtc = DecisionTreeClassifier()

In [None]:
dtc.fit(X_train,y_train)

In [None]:
pred = dtc.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [None]:
print(confusion_matrix(y_test,pred))

In [None]:
print(classification_report(y_test,pred))

In [None]:
#feature scalling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [None]:
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
dtc.fit(X_train,y_train)

In [None]:
preddtc = dtc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,preddtc))

In [None]:
print(classification_report(y_test,preddtc))

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(16,8))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,dtc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set Decision Tree")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(16,8))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,dtc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set Decision Tree")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier(n_estimators=200)

In [None]:
rfc.fit(X_train,y_train)

In [None]:
predrfc = rfc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predrfc))

In [None]:
print(classification_report(y_test,predrfc))

In [None]:
accuracy_score(y_test,predrfc)

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(16,8))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,rfc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set Random Forest")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(16,8))
from matplotlib.colors import ListedColormap
X_set , y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,rfc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set Random Forest")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC()

In [None]:
svc.fit(X_train,y_train)

In [None]:
svcpred = svc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,svcpred))

In [None]:
print(classification_report(y_test,svcpred))

In [None]:
accuracy_score(y_test,svcpred)

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,svc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set Support Vector Classifier")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,svc.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set Support Vector Classifier")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lg = LogisticRegression()

In [None]:
lg.fit(X_train,y_train)

In [None]:
predlg = lg.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predlg))

In [None]:
print(classification_report(y_test,predlg))

In [None]:
accuracy_score(y_test,predlg)

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,lg.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set Logistic Regression")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,lg.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set Logistic Regression")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
NB = GaussianNB()

In [None]:
NB.fit(X_train,y_train)

In [None]:
prednb = NB.predict(X_test)

In [None]:
print(confusion_matrix(y_test,prednb))

In [None]:
print(classification_report(y_test,prednb))

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,NB.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set Naive bayes")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,NB.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set Naive bayes")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier()

In [None]:
knn.fit(X_train,y_train)

In [None]:
predknn = knn.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predknn))

In [None]:
print(classification_report(y_test,predknn))

In [None]:
accuracy_score(y_test,predknn)

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,knn.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Training set KNN")
plt.xlabel('PC 1')
plt.ylabel('PC 2')

In [None]:
sns.set_context('notebook',font_scale=2)
plt.figure(figsize=(10,6))
from matplotlib.colors import ListedColormap
X_set , y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-1,stop = X_set[:,0].max()+1,step = 0.01),
                     np.arange(start = X_set[:,1].min()-1,stop = X_set[:,1].max()+1,step = 0.01)     )
plt.contourf(X1,X2,knn.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.5,cmap = ListedColormap(('red','green')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[j,0],X_set[j,1],cmap=ListedColormap(('red','green'))(i),label=j)
plt.title("Test set KNN")
plt.xlabel('PC 1')
plt.ylabel('PC 2')