In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%notebook inline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

# Reading csv file and checking head

In [None]:
df = pd.read_csv("/kaggle/input/mushroom-classification/mushrooms.csv")

df.head()

**Checking for null values**

In [None]:
df.isna().sum()

# Encoding labels

In [None]:
enc = LabelEncoder()

ls = [] # for reference

df1 = df.copy()

for col in df.columns:
    df1[col] = enc.fit_transform(df1[col])
    col_dict = {index: label for index, label in enumerate(enc.classes_)}
    ls.append(col_dict)
    

In [None]:
df1

# Encoded values for labels of all columns

**Every dictionary represents a column**

In [None]:
for i in ls:
    print(i)
    print("\n")

# Splitting data

In [None]:
X = df1.drop("class", axis = 1)
y = df1["class"]

sc = StandardScaler()

X = pd.DataFrame(sc.fit_transform(X), columns= X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 101)

# Choosing a K value for KNN Classifier

In [None]:
error = []

for i in range(1,50):
    knn = KNeighborsClassifier(n_neighbors = i )
    knn.fit(X_train, y_train)
    pred = knn.predict(X_test)
    error.append(np.mean(pred != y_test))

In [None]:
plt.figure(figsize=(9,7))
plt.xlabel("K")
plt.ylabel("Error")
plt.plot(range(1,50), error, marker = "*" , markersize = 9)


# Training models

In [None]:
models = {
    " Logistic Regression": LogisticRegression(),
    "K Nearest Neighbours": KNeighborsClassifier(n_neighbors=7),
    "       Decision Tree": DecisionTreeClassifier(),
    "      SVM Classifier": SVC(gamma='auto'),
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

# Testing models

In [None]:
for name, model in models.items():
    print(name + " Accuracy: {:.5f}".format(model.score(X_test, y_test)))
                     

# Improving accuracy of Logistic Regression 

In [None]:
updated_models = {
    " Logistic Regression": LogisticRegression(class_weight="balanced", solver="newton-cg", penalty="none"),
    "K Nearest Neighbours": KNeighborsClassifier(n_neighbors=7),
    "       Decision Tree": DecisionTreeClassifier(),
    "      SVM Classifier": SVC(gamma='auto'),
}

for name, model in updated_models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

In [None]:
for name, model in updated_models.items():
    print(name + " Accuracy: {:.5f}".format(model.score(X_test, y_test)))
                     

# Cross Validation

In [None]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)

for name, model in updated_models.items():
    print(name)
    print(cross_val_score(model, X, y, cv=cv))
    print("\n")