In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
msh_df = pd.read_csv('../input/mushroom-classification/mushrooms.csv')
msh_df.head(5)

In [None]:
msh_df.columns

In [None]:
from pandas_profiling import ProfileReport
report = ProfileReport(msh_df, title = 'Mushroom Dataset')#, explorative = True)
report.to_widgets()

In [None]:
msh_df = msh_df.drop(['veil-type', 'gill-attachment'], axis = 1)
msh_df.head(2)

In [None]:
msh_df.columns

# Handling Categorical Features

In [None]:
cat_col = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root',
       'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-color',
       'ring-number', 'ring-type', 'spore-print-color', 'population',
       'habitat']

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [None]:
for x in cat_col:
    msh_df[x] = le.fit_transform(msh_df[x])

In [None]:
msh_df.head()

# Splliting Independent and Dependent Features

In [None]:
x = msh_df.iloc[:, 1:]
x.head()

In [None]:
y = msh_df['class']
y.head()

# Feature Selection using Univariate Selection

In [None]:
from sklearn.feature_selection import SelectKBest, chi2

In [None]:
bst_features = SelectKBest(score_func = chi2, k = 20)
fit = bst_features.fit(x, y)

In [None]:
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(x.columns)

In [None]:
fscore = pd.concat([dfcolumns, dfscores], axis = 1)
fscore.columns = ['Column', 'Score']
fscore

In [None]:
print(fscore.nlargest(20, 'Score'))
#higher the score highly the dependent variable is dependent on that variable

In [None]:
x_new = msh_df.iloc[:, 1:]
x_new.head()

In [None]:
y_new = msh_df['class']
y_new.head()

In [None]:
from sklearn.model_selection import train_test_split 

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x_new, y_new, test_size = .2, random_state = 1)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
classifier = LogisticRegression(random_state = 1)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
classifier = SVC(kernel = 'linear', random_state = 1)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# Kernel SVM

In [None]:
classifier = SVC(kernel = 'rbf', random_state = 1)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# Naive Bayes 

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
classifier = GaussianNB()
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 1)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
classifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 1)
classifier.fit(xtrain, ytrain)

In [None]:
ypred = classifier.predict(xtest)
cm = confusion_matrix(ytest, ypred)
print(cm)
accuracy_score(ytest, ypred)