In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dataset=pd.read_csv('/kaggle/input/mushroom-classification/mushrooms.csv')
dataset.head()

In [None]:
dataset.shape


**Data Analysis**

In [None]:
for column in dataset.columns:
        strLen=len(dataset[column].unique())
        print(column + ' : ' + str(strLen))



In [None]:
cont=[feature for feature in dataset.columns if dataset[feature].dtypes != 'O']
cont

**There are no continuous features**

In [None]:
dataset.isnull().any()

In [None]:

sns.countplot(dataset['class'])

**Its not an imbalance dataset**

In [None]:
total =len(dataset)
for column in dataset.columns:
    if(column != 'class' and dataset[column].dtypes=='O'):
        graph = sns.countplot(dataset[column])
        plt.title(column)
        for p in graph.patches:
            height = p.get_height()
            graph.text(p.get_x()+p.get_width()/2.,
            height + 3,
            ('{:1.2f}'.format((height/total) * 100) + '%'),
            ha="center")
        plt.show()

**Conclusion from Above Graphs:**
The veil-type feature has only only one categorey. So, we can drop this column.
In most of the features, few categories are very minimal. So we can combine those categories as single category

**For Example:**
If we look at ring-type graph, the categories f and n  are very less (0.59% and 0.44% respectively).
So, we can combine them as single category as 'Others'

**Feature Engineering**

In [None]:
Y=dataset['class']
X_pre=dataset.drop('class', axis=1)
X_pre.head()
X_pre.columns

In [None]:
#The veil-type feature has only one categorey. So, we can drop this column
X_pre = X_pre.drop('veil-type', axis=1)
X_pre.head()

In [None]:
#Grouping all categories less than of 3% to a single category for all features seperately
threePercent=(3 * len(X_pre))/100
print('Categories to be combined')
print('-------------------------')
for column in X_pre.columns:
        if(len(X_pre[column].unique()) > 2):
            counts = X_pre[column].value_counts()
            ids=counts[counts.lt(threePercent)].index
            if(len(ids)>1):
                print(column + ' : ' + str(np.array(ids)))
                X_pre[column] =X_pre[column].replace(ids,'Others')
X_pre            

In [None]:
#Assigining dummies for all independent columns
X=pd.DataFrame();
for column in X_pre.columns:
    dummy =pd.get_dummies(X_pre[column], prefix=column, drop_first=True)
    X=pd.concat([X, dummy], axis=1)
X.head()

In [None]:
Y=Y.map({'e':1,'p':0})
Y.head()

**Model Building & Execution**

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test,y_train, y_test=train_test_split(X, Y, test_size=0.2)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

In [None]:
def build_model(hp):
    model=keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate',
                      values=[1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    
    return model

In [None]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3)

In [None]:
tuner.search(x_train, y_train, epochs=5, validation_split=0.1)

In [None]:

# Retrieve the best model.
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model.
y_predict = best_model.predict(x_test)
y_predict=(y_predict>0.5)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
print(confusion_matrix(y_test, y_predict))
print(accuracy_score(y_test, y_predict))