# Importing the Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Loading Dataset

In [None]:
df = pd.read_csv("../input/mushroom-classification/mushrooms.csv")
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.describe(include='O')

# Converting All the columns to numeric using Label Encoder as all the columns content categorical data

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [None]:
df = df.apply(LabelEncoder().fit_transform)

# Checking the Class Imbalanced or not 

In [None]:
df['class'].value_counts()

# Checking For Correlation

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(df.corr(),annot=True)

# Splitting the dataset

In [None]:
X = df.drop('class',axis=1)
y = df['class']

In [None]:
y

In [None]:
X.describe()

In [None]:
from sklearn.preprocessing import StandardScaler
S = StandardScaler()

In [None]:
X = pd.DataFrame(S.fit_transform(X),columns=X.columns)

In [None]:
X.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

# Model Selection

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [None]:
LR = LogisticRegression()
RFC = RandomForestClassifier()

In [None]:
LR.fit(X_train,y_train)
RFC.fit(X_train,y_train)

In [None]:
print("Logistic Regression",LR.score(X_test,y_test))
print("RandomForest Classifier",RFC.score(X_test,y_test))

In [None]:
y_predL = LR.predict(X_test)     #Logistic
y_predRF = RFC.predict(X_test)   #RandomForest

In [None]:
from sklearn.model_selection import cross_val_score
scoreL = cross_val_score(LR,X,y,cv=5)
scoreRF = cross_val_score(RFC,X,y,cv=5)

In [None]:
print("Logistic cross_val_score",np.mean(scoreL))
print("Random Forest Classifier cross_val_score",np.mean(scoreRF))

In [None]:
from sklearn.metrics import f1_score,classification_report,confusion_matrix

In [None]:
# This is for Logistic Regression
print(confusion_matrix(y_test,y_predL))
print(classification_report(y_test,y_predL))

In [None]:
# This is for Random Forest Classifier
print(confusion_matrix(y_test,y_predRF))
print(classification_report(y_test,y_predRF))

# This is for Support Vector Machine

In [None]:

from sklearn.svm import SVC
svc = SVC()

In [None]:
svc.fit(X_train,y_train)

In [None]:
print(svc.score(X_test,y_test))

In [None]:
scoreSvc = cross_val_score(svc,X,y,cv=5)
print("scoreSvc",np.mean(scoreSvc))

In [None]:
y_predSvc = svc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,y_predSvc))
print(classification_report(y_test,y_predSvc))