### Attribute Information: 

1. classes: edible=e, poisonous=p
2. cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s
3. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
4. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
5. bruises: bruises=t,no=f
6. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
7. gill-attachment: attached=a,descending=d,free=f,notched=n
8. gill-spacing: close=c,crowded=w,distant=d
9. gill-size: broad=b,narrow=n
10. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g, green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
11. stalk-shape: enlarging=e,tapering=t
12. stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
13. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
14. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
15. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
16. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
17. veil-type: partial=p,universal=u
18. veil-color: brown=n,orange=o,white=w,yellow=y
19. ring-number: none=n,one=o,two=t
20. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
21. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
22. population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
23. habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d

In [None]:
#Importing necessary libraries
import pandas as pd
from sklearn import tree
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.base import TransformerMixin
% matplotlib inline

In [None]:
#Reading the files
dataset = pd.read_csv("../input/mushrooms.csv", na_values= '?', keep_default_na = False)

In [None]:
dataset.head()

In [None]:
# Checking the data types
dataset.info()

In [None]:
#Shape of dataset
dataset.shape

## Handling Missing data
* we have missing data denoted by "NaN" under stalk-root column.
* we will replace NaN values with most frequent value under column "Stalk-root"

In [None]:
#checking null values
dataset.isnull().sum()

In [None]:
class DataFrameImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.

        Columns of dtype object are imputed with the most frequent value 
        in column.

        Columns of other types are imputed with mean of column.

        """
    def fit(self, X, y=None):

        self.fill = pd.Series([X[c].value_counts().index[0]
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
            index=X.columns)

        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)

In [None]:
#X = pd.DataFrame(dataset['stalk-root'])
dataset['stalk-root'] = DataFrameImputer().fit_transform(pd.DataFrame(dataset['stalk-root']))

In [None]:
#checking null values
dataset.isnull().sum()

### Label Encoding

In [None]:
labelencoder = LabelEncoder()
for col in dataset.columns:
    dataset[col] = labelencoder.fit_transform(dataset[col])
dataset.head()

### Defining Dependent and Independent variables

In [None]:
# Defining x and y variables
x = dataset.iloc[:, 1:23].values
y = dataset.iloc[:, :1].values

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)

### Train/Test Split

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=123)

### Decision Tree

In [None]:
#Fitting classifier to training set
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier = classifier.fit(x_train, y_train)

In [None]:
y_pred = classifier.predict(x_test)

In [None]:
#Confusion Matrix
cm = metrics.confusion_matrix(y_test, y_pred)
cm

### Random Forest

In [None]:
#Fitting classifier to training set
from sklearn.ensemble import RandomForestClassifier

classifier1 = RandomForestClassifier()
classifier1.fit(x_train, y_train.ravel())

In [None]:
y_pred = classifier1.predict(x_test)

In [None]:
#Confusion Matrix
cm = metrics.confusion_matrix(y_test, y_pred)
cm