In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer

In [None]:
df = pd.read_csv('assets\\mushroom_cleaned.csv')
df

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.isna().sum()

In [None]:
df.duplicated().sum()

In [None]:
df[df.duplicated(keep=False)]

In [None]:
df = df.drop_duplicates()

In [None]:
X = df.drop('class', axis=1)
y = df['class']

In [None]:
df['class'].value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model_forest = RandomForestClassifier(random_state=42)
model_forest.fit(X_train, y_train)
yhat = model_forest.predict(X_test)

In [None]:
print(classification_report(y_test, yhat))

In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, yhat)).plot(cmap='GnBu');

In [None]:
import tensorflow.keras as tfk
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import set_random_seed

In [None]:
set_random_seed(42)

model_nn = Sequential([
    Input(X_train.shape[1:]),
    Dense(32, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [None]:
model_nn.compile(optimizer='adam', loss=tfk.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [None]:
model_nn.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test))