### Importing Dependencies :

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import scipy as sp

### Importing/Loading Data Set "Mushroom" :

In [None]:
df = pd.read_csv("../input/mushroom-classification/mushrooms.csv", skipinitialspace= True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.size

In [None]:
df.ndim

In [None]:
df.columns

In [None]:
len(df.columns)

In [None]:
df.index

In [None]:
df.isnull().sum()

### Checking unique values from some features :

In [None]:
df["cap-shape"].unique()

In [None]:
df['class'].unique()

In [None]:
df['cap-surface'].unique()

### Changing Column Names :

In [None]:
df

In [None]:
df.columns

In [None]:
df["class"]

In [None]:
df["class"].unique()

In [None]:
df["class"].value_counts()

### Description about Data Set :
- All columns are categorical

### Building ML model using Decision Tree

### Separating out predictors and targets

In [None]:
X = df.iloc[:,1:]
y = df["class"]

In [None]:
X.shape

In [None]:
X.head()

In [None]:
y.shape

In [None]:
y.head()

### Feature Engineering Using One Hot Encoding

In [None]:
X_new = pd.get_dummies(X)

In [None]:
X_new.head()

### To print the whole data set with all instances at a moment

In [None]:
#pd.set_option("display.max_rows", 8123, "display.max_columns", 117)

### We are removing/dropping any sub-column from each original column

In [None]:
X_new.shape

In [None]:
df.columns

In [None]:
X_new = X_new.drop(['cap-shape_b','cap-surface_f','cap-color_b','bruises_f','odor_a','gill-attachment_a','gill-spacing_c',
                    'gill-size_b','gill-color_b','stalk-shape_e','stalk-root_b','stalk-surface-above-ring_f',
                    'stalk-surface-below-ring_f','stalk-color-above-ring_b','stalk-color-below-ring_b',
                    'veil-color_n','ring-number_n','ring-type_e','spore-print-color_b','population_a','habitat_d'], axis = 1)

In [None]:
X_new.head()

In [None]:
X_new.shape

### Scaling our prdictors

In [None]:
from sklearn.preprocessing import scale

In [None]:
X_scaled = pd.DataFrame(scale(X_new))

In [None]:
X_scaled

### Applying PCA on separated predictors

In [None]:
from sklearn.decomposition import PCA

### Initializing PCA

In [None]:
pca = PCA()

In [None]:
X_pca = pd.DataFrame(pca.fit_transform(X_scaled))

In [None]:
X_pca

In [None]:
pca.explained_variance_ratio_

In [None]:
pca.explained_variance_ratio_[0:45]*100

In [None]:
x1 = (range(1,97))
y1 = list(pca.explained_variance_ratio_*100)

In [None]:
rcParams["figure.figsize"] = 15,10

ax = plt.gca()
ax.set_facecolor("xkcd:white")

plt.plot(x1, y1, ls ="-", lw =8, marker = "^", mew = 2.5, color = "cyan", mec= "purple")

plt.yticks([0,1,2,3,4,5,6,7,8,9,10])

plt.annotate('No. of PCs used as features : 45', xy=(45, 0.66), xytext=(54, 2), size = 20,
             arrowprops=dict([("facecolor","black"), ("shrink",0), ("edgecolor" ,"black")]))


plt.legend(["% EVR of PCA"], fontsize = 'xx-large', loc = 'upper right')
plt.xlabel("PCA", size = 16)
plt.ylabel("Percentage of Explained Variance Ratio", size = 16)
plt.title("% of Explained Variance Ratio Vs PCA", size = 25)
plt.grid(ls = "-.")
plt.show()

In [None]:
len(pca.explained_variance_ratio_)

In [None]:
pca.explained_variance_ratio_.sum()

In [None]:
pca.explained_variance_ratio_[0:45].sum()

In [None]:
pca.explained_variance_ratio_[0:45]

In [None]:
X_pca_final = X_pca.loc[:,0:45]

In [None]:
X_pca_final

### Splitting out data into training and testing  

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_pca_final, y, test_size=0.2, random_state=7)

In [None]:
X_train

In [None]:
X_train.shape

In [None]:
y_train.head()

In [None]:
y_train.shape

### Building ML model using Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model = RandomForestClassifier(n_estimators=70,
                               criterion='gini',
                               max_depth=7)

In [None]:
model.fit(X_train,y_train)

In [None]:
y_predict = model.predict(X_test)

In [None]:
y_predict.shape

### Checking the accuracy of our model

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test,y_predict)

### Verifying the performance of model using confusion matrix

In [None]:
pd.crosstab(y_test,y_predict)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test,y_predict))

In [None]:
y_test

In [None]:
pd.Series(y_predict)

### Test vs predictions  (Visuallization)

In [None]:
y_test

In [None]:
type(y_test)

In [None]:
y_test.value_counts()

In [None]:
type(y_predict)

In [None]:
y_predict

In [None]:
predict = pd.DataFrame(y_predict, columns = ["classes"])

In [None]:
predict

In [None]:
predict.classes.value_counts()

### Pie Chart

In [None]:
plt.figure()

rcParams["figure.figsize"] = 14,11

plt.subplot(221)
labels0 = ["Edible","Non-Edible"]
z1 = [850,775]

plt.pie(z1 , [0.05,0], labels = labels0 ,  autopct = "%0.2f%%", colors = ["green","red"], radius = 1.1, startangle = 90,shadow = True)
plt.legend(labels0,
           title = "Mushroom Categories",
           loc = "upper right",
           bbox_to_anchor = (1.4,0.4,0.,0.5))
plt.title("Testing data of Mushroom", size = 25)


plt.subplot(222)
z = [850,775]
labels1 = ["Edible","Non-Edible"]

plt.pie(z , [0.05,0],  labels = labels1, autopct = "%0.2f%%", colors = ["cyan","yellow"], radius = 1.1, startangle = 90,shadow = True)
plt.legend(labels1,
           loc = "upper right",
           title = "Mushroom Categories",
           bbox_to_anchor = (1.5,0.2,0.1,0.7))
plt.title("Predictions about Mushroom", size = 25)
plt.show()

###  Visuallization using Histogram

In [None]:
rcParams["figure.figsize"] = 14,6

plt.subplot(121)
plt.bar(x = ["Edible","Non-Edible"], height = [850,775], width = 0.19, color = ["green","red"])
plt.xticks([0,1])
plt.title("Testing Data", size =20)
plt.xlabel("Mushroom Categories",size = 10)
plt.ylabel("Count",size = 10)
plt.grid(ls = "-.")



plt.subplot(122)
plt.bar(x = ["Edible","Non-Edible"], height = [850,775], width = 0.19, color = ["cyan","yellow"])
plt.xticks([0,1])
plt.title("Predictions ", size =20)
plt.xlabel("Mushroom Categories",size = 10)
plt.ylabel("Count",size = 10)
plt.grid(ls = "-.")
plt.show()