In [1]:
import pandas as pd
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D,Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Embedding, LSTM

In [2]:
data = pd.read_csv('./mushrooms.csv')
data.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

In [4]:
data.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [5]:
for column in data.columns:
    print(f"Column: {column}")
    print(data[column].value_counts())
    print("-------------------")

Column: class
e    4208
p    3916
Name: class, dtype: int64
-------------------
Column: cap-shape
x    3656
f    3152
k     828
b     452
s      32
c       4
Name: cap-shape, dtype: int64
-------------------
Column: cap-surface
y    3244
s    2556
f    2320
g       4
Name: cap-surface, dtype: int64
-------------------
Column: cap-color
n    2284
g    1840
e    1500
y    1072
w    1040
b     168
p     144
c      44
u      16
r      16
Name: cap-color, dtype: int64
-------------------
Column: bruises
f    4748
t    3376
Name: bruises, dtype: int64
-------------------
Column: odor
n    3528
f    2160
y     576
s     576
a     400
l     400
p     256
c     192
m      36
Name: odor, dtype: int64
-------------------
Column: gill-attachment
f    7914
a     210
Name: gill-attachment, dtype: int64
-------------------
Column: gill-spacing
c    6812
w    1312
Name: gill-spacing, dtype: int64
-------------------
Column: gill-size
b    5612
n    2512
Name: gill-size, dtype: int64
------------------

In [6]:
# creating a variable for storing the variables containing binary category data.
binary_cat_columns = []

for column in data.columns:
    if len(data[column].unique()) == 2:
        binary_cat_columns.append(column)

print("Binary categorical Columns:", binary_cat_columns)

Binary categorical Columns: ['class', 'bruises', 'gill-attachment', 'gill-spacing', 'gill-size', 'stalk-shape']


In [7]:
# creating a variable for storing the variables containing categorical data greater than 2 categories.
cat_columns = []

for column in data.columns:
    if len(data[column].unique()) > 2:
        cat_columns.append(column)

print("categorical Columns:", cat_columns)

categorical Columns: ['cap-shape', 'cap-surface', 'cap-color', 'odor', 'gill-color', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat']


In [8]:
# Label encoder for binary categorical columns
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
label_encoder = LabelEncoder()
for column in binary_cat_columns:
    data[column] = label_encoder.fit_transform(data[column])
    
    
# One hot encoding for categorical column containing more than 2 categories
data = pd.get_dummies(data, columns=cat_columns, drop_first=True)

In [9]:
data.head()

Unnamed: 0,class,bruises,gill-attachment,gill-spacing,gill-size,stalk-shape,veil-type,cap-shape_c,cap-shape_f,cap-shape_k,...,population_n,population_s,population_v,population_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,1,1,1,0,1,0,p,0,0,0,...,0,1,0,0,0,0,0,0,1,0
1,0,1,1,0,0,0,p,0,0,0,...,1,0,0,0,1,0,0,0,0,0
2,0,1,1,0,0,0,p,0,0,0,...,1,0,0,0,0,0,1,0,0,0
3,1,1,1,0,1,0,p,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,0,0,1,1,0,1,p,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [10]:
data.drop(columns='veil-type',inplace=True)

In [11]:
x = data.drop('class', axis= True)
y = data[['class']]

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

### MLP

In [13]:
from keras.models import Sequential
from keras.layers import Dense
# creating MLP model
mlp = Sequential()
mlp.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
mlp.add(Dense(64, activation='relu'))
mlp.add(Dense(1, activation='sigmoid'))

mlp.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
# Training and evaluating the MLP model
mlp.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

mlp_loss, mlp_accuracy = mlp.evaluate(X_test, y_test, verbose=0)
print(f"MLP Accuracy: {mlp_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MLP Accuracy: 1.0


In [15]:
from sklearn.metrics import classification_report
# predicting MLP model
mlp_pred = mlp.predict(X_test)

# continuous prediction to binary prediction
mlp_pred_binary = (mlp_pred > 0.5).astype(int)

print("Classification Report of MLP:")
print(classification_report(y_test, mlp_pred_binary))

Classification Report of MLP:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### Hyperparameter tuning of MLP

In [16]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

# Function to create an MLP model
def create_mlp_model(optimizer='adam', hidden_units=128):
    model = Sequential()
    model.add(Dense(hidden_units, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create KerasClassifier wrapper for GridSearchCV
mlp_model = KerasClassifier(build_fn=create_mlp_model, epochs=10, batch_size=32, verbose=0)

# Define hyperparameters to search
param_grid = {
    'optimizer': ['adam', 'sgd'],
    'hidden_units': [64, 128, 256]
}

# Perform GridSearchCV
mlp_grid = GridSearchCV(estimator=mlp_model, param_grid=param_grid, cv=3)
mlp_grid_train = mlp_grid.fit(X_train, y_train)

# Get the best MLP model from the grid search
best_mlp_model = mlp_grid_train.best_estimator_

# Predictions for the best MLP model
mlp_pred = best_mlp_model.predict(X_test)

# Convert continuous predictions to binary predictions
mlp_pred_binary = (mlp_pred > 0.5).astype(int)

# Print accuracy and classification report for the best MLP model
mlp_report = classification_report(y_test, mlp_pred_binary)

print("Best MLP Model:")
print(f"Best Parameters: {mlp_grid_train.best_params_}")
print("Classification Report:")
print(mlp_report)

  mlp_model = KerasClassifier(build_fn=create_mlp_model, epochs=10, batch_size=32, verbose=0)


Best MLP Model:
Best Parameters: {'hidden_units': 64, 'optimizer': 'adam'}
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### RNN

In [17]:
from keras.layers import Embedding, LSTM

num_unique_features = 95
embedding_dim = 32

# Creating model RNN 
rnn = Sequential()
rnn.add(Embedding(input_dim=num_unique_features, output_dim=embedding_dim))
rnn.add(LSTM(64))
rnn.add(Dense(1, activation='sigmoid'))

# Compiling RNN
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [18]:
# Training and evaluating RNN
rnn.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

rnn_loss, rnn_accuracy = rnn.evaluate(X_test, y_test, verbose=0)
print(f"RNN Accuracy: {rnn_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
RNN Accuracy: 0.9279999732971191


In [19]:
# Predicting RNN model
rnn_pred = rnn.predict(X_test)

# continuous prediction to binary prediction
rnn_pred_binary = (rnn_pred > 0.5).astype(int)  

# Print classification report for RNN
print("Classification Report for RNN:")
print(classification_report(y_test, rnn_pred_binary))

Classification Report for RNN:
              precision    recall  f1-score   support

           0       0.88      1.00      0.94       843
           1       1.00      0.85      0.92       782

    accuracy                           0.93      1625
   macro avg       0.94      0.93      0.93      1625
weighted avg       0.94      0.93      0.93      1625



### Hyperparameter tunning of RNN

In [20]:
from keras.layers import Embedding, LSTM
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# Function to create an RNN model
def create_rnn_model(embedding_dim=32, lstm_units=64, optimizer='adam'):
    rnn = Sequential()
    rnn.add(Embedding(input_dim=num_unique_features, output_dim=embedding_dim))
    rnn.add(LSTM(lstm_units))
    rnn.add(Dense(1, activation='sigmoid'))
    rnn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return rnn

# Convert Pandas DataFrames to numpy arrays
X_train_np = X_train.values
X_test_np = X_test.values

# Create KerasClassifier wrapper for GridSearchCV
rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=32, verbose=0)

# Define hyperparameters to search
rnn_param_grid = {
    'embedding_dim': [16, 32, 64],
    'lstm_units': [32, 64, 128],
    'optimizer': ['adam', 'sgd']
}

# Perform GridSearchCV
rnn_grid = GridSearchCV(estimator=rnn_model, param_grid=rnn_param_grid, cv=3)
rnn_grid_result = rnn_grid.fit(X_train_np, y_train)

# Get the best RNN model from the grid search
best_rnn_model = rnn_grid_result.best_estimator_

# Predictions for the best RNN model
rnn_pred = best_rnn_model.predict(X_test_np)

# Convert continuous predictions to binary predictions
rnn_pred_binary = (rnn_pred > 0.5).astype(int)

# classification report for the best RNN model
rnn_report = classification_report(y_test, rnn_pred_binary)

print("Best RNN Model:")
print(f"Best Parameters: {rnn_grid_result.best_params_}")
print("Classification Report:")
print(rnn_report)

  rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=32, verbose=0)


Best RNN Model:
Best Parameters: {'embedding_dim': 64, 'lstm_units': 128, 'optimizer': 'adam'}
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.95      0.92       843
           1       0.94      0.89      0.91       782

    accuracy                           0.92      1625
   macro avg       0.92      0.92      0.92      1625
weighted avg       0.92      0.92      0.92      1625



### CNN

In [21]:
# Importing important libraries
from keras.layers import Conv1D, MaxPooling1D, Flatten

# Creating model CNN
cnn = Sequential()
cnn.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(95,1)))
cnn.add(MaxPooling1D(pool_size=2))
cnn.add(Flatten())
cnn.add(Dense(64, activation='relu'))
cnn.add(Dense(1, activation='sigmoid'))

# Compile model CNN
cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [22]:
# Training and evaluating CNN model
cnn.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the CNN model
cnn_loss, cnn_accuracy = cnn.evaluate(X_test, y_test, verbose=0)
print(f"CNN Accuracy: {cnn_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CNN Accuracy: 1.0


In [23]:
# Predict using the CNN model
cnn_pred = cnn.predict(X_test)

# continuous prediction to binary prediction
cnn_pred_binary = (cnn_pred > 0.5).astype(int)

# Print classification report for CNN
print("Classification Report for CNN:")
print(classification_report(y_test, cnn_pred_binary))

Classification Report for CNN:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### Hyperparameter tuning of CNN

In [24]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

# Function to create a CNN model
def create_cnn_model(optimizer='adam', filters=32):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create KerasClassifier wrapper for GridSearchCV
cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)

# Define hyperparameters to search
cnn_param_grid = {
    'optimizer': ['adam', 'sgd'],
    'filters': [16, 32, 64]
}

# Perform GridSearchCV
cnn_grid = GridSearchCV(estimator=cnn_model, param_grid=cnn_param_grid, cv=3)
cnn_grid_result = cnn_grid.fit(X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1), y_train)

# Get the best CNN model from the grid search
best_cnn_model = cnn_grid_result.best_estimator_

# Predictions for the best CNN model
cnn_pred = best_cnn_model.predict(X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1))

# Convert continuous predictions to binary predictions
cnn_pred_binary = (cnn_pred > 0.5).astype(int)

# Print accuracy and classification report for the best CNN model
cnn_report = classification_report(y_test, cnn_pred_binary)

print("Best CNN Model:")
print(f"Best Parameters: {cnn_grid_result.best_params_}")
print("Classification Report:")
print(cnn_report)

  cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)


Best CNN Model:
Best Parameters: {'filters': 16, 'optimizer': 'adam'}
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

