# **Mushroom Classification**

### Importing Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score 

import warnings 
warnings.filterwarnings('ignore')

### Load DataSet

In [None]:
df = pd.read_csv("../input/mushroom-classification/mushrooms.csv")

In [None]:
df.head()

In [None]:
sns.countplot(data=df,x='class')
plt.show()

</p>It's a quiet balanced dataset,so we don't need to do anything here.</p>

### Data Preprocessing

In [None]:
mappings = list()

encoder = LabelEncoder()

for column in range(len(df.columns)):
    df[df.columns[column]] = encoder.fit_transform(df[df.columns[column]])
    mappings_dict = {index: label for index, label in enumerate(encoder.classes_)}
    mappings.append(mappings_dict)

mappings

### Correlation Analysis

In [None]:
#corelation matrix.
cor_mat= df[:].corr()
mask = np.array(cor_mat)
mask[np.tril_indices_from(mask)] = False
fig=plt.gcf()
fig.set_size_inches(23,9)
sns.heatmap(data=cor_mat,mask=mask,square=True,annot=True,cbar=True,cmap='plasma')
plt.show()

### Model Building

In [None]:
# Separate dependent and independent variables 
X = df.drop('class', axis=1)
y = df['class']

In [None]:
# Scaling data
sc = StandardScaler()

X = pd.DataFrame(sc.fit_transform(X), columns=X.columns,index = X.index)
X.head()

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state = 50)

In [None]:
# Models
models=[LogisticRegression(), SVC(C=1.0, kernel='rbf'),RandomForestClassifier(random_state=50),XGBClassifier(random_state=50,eval_metric='logloss')]
model_names=['LogisticRegression','SVM','RandomForestClassifier','XGBoostClassifier']
acc = []
dictionary = {}
for model in range(len(models)):
    clf = models[model]
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    acc.append(accuracy_score(y_pred,y_test))
     
dictionary = {'Model Names':model_names,'Accuracy':acc}
# Put the accuracies in a data frame.
acc_df = pd.DataFrame(dictionary)


In [None]:
print(acc_df)

**We can see that the results are very good:)**