In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import plot_roc_curve
import warnings
warnings.filterwarnings("ignore")

In [None]:
plt.rcParams['axes.labelsize'] = 15.
plt.rcParams['xtick.labelsize']= 15.
plt.rcParams['ytick.labelsize'] = 15.
plt.rcParams['figure.figsize'] =[15.,8.]
plt.rcParams['legend.fontsize'] = 13.

In [None]:
data = pd.read_csv('../input/mushroom-classification/mushrooms.csv')
data.head()

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data['class'].value_counts()

In [None]:
#check for missing value
data.isna().sum()

In [None]:
# No missing value found

In [None]:
sns.countplot(data=data, x='class', palette="magma");

### Target value is approx distrubuted equally

In [None]:
for i in data.columns[1:]:
    
    sns.countplot(data=data, x=i, palette="magma")
    plt.grid()
    plt.show()

In [None]:
for i in data.columns[1:]:
    plt.figure(figsize=(10,6))
    sns.histplot(data=data, x=i,palette='magma');

In [None]:
data.head()

In [None]:
sns.countplot(data=data, x="cap-shape" ,hue="class")

In [None]:
sns.countplot(data=data, x="habitat", hue="class");

In [None]:
label = LabelEncoder()
data = data.apply(LabelEncoder().fit_transform)

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isna().sum()

In [None]:
x = data.drop(["class"],axis=1)
y = data["class"]

In [None]:
# Train & Test Split
x_train,x_test, y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [None]:
models = {"Logistic Regression":LogisticRegression(),
         "RandomForestClassifier":RandomForestClassifier(),
          "KNN":KNeighborsClassifier(),
          "Naive Bayes": GaussianNB(),
          "Decision Tree":DecisionTreeClassifier(),
          "SVM": LinearSVC()
         }

def fit_and_score(models,x_train,x_test,y_train,y_test):
    model_scores = {}
    accuracy_score = {}
    
    for name,model in models.items():
        model.fit(x_train,y_train)
        model_scores[name]= model.score(x_test,y_test)
    return model_scores

In [None]:
score = fit_and_score(models= models,
                     x_train=x_train,
                     x_test=x_test,
                     y_train=y_train,
                     y_test=y_test)
score

In [None]:
compare_score = pd.DataFrame(score, index=["accuracy"])
compare_score.T.plot(kind="barh")