In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

In [2]:
df = pd.read_csv("mushrooms.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

In [3]:
binary_cols = ["class", "bruises", "gill-size"]

def rename_getdummies(df):
    cols = list( set(df.columns.to_list()).difference( set(binary_cols) ) )
    df_op = df[cols].copy()

    for col in cols:
        replace_dict = {}
        unique_els = df_op[col].unique()
        for unique_el in unique_els:
            replace_dict[unique_el] = col + "_" + unique_el

        df_op[col] = df_op[col].replace(replace_dict)

        df_op = df_op.join( pd.get_dummies( df_op[col] ) )
        df_op = df_op.drop( col, axis=1)
        
    return df_op.join( df[binary_cols] )

In [4]:
df = rename_getdummies(df)

In [5]:
def convert_to_binary(df):
    df_op = df[binary_cols].copy()

    for col in binary_cols:
        unique_els = df_op[col].unique()

        replace_dict = {unique_els[0]: 0, unique_els[1]: 1}
        df_op[col] = df_op[col].replace(replace_dict)

    return df_op

In [6]:
converted = convert_to_binary(df)
df = converted.join( df.drop(binary_cols, axis=1) ).copy()

In [7]:
df.head()

Unnamed: 0,class,bruises,gill-size,stalk-root_?,stalk-root_b,stalk-root_c,stalk-root_e,stalk-root_r,population_a,population_c,...,veil-color_y,stalk-color-below-ring_b,stalk-color-below-ring_c,stalk-color-below-ring_e,stalk-color-below-ring_g,stalk-color-below-ring_n,stalk-color-below-ring_o,stalk-color-below-ring_p,stalk-color-below-ring_w,stalk-color-below-ring_y
0,0,0,0,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,True,False
1,1,0,1,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,1,0,1,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,0,0,0,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,1,1,1,False,False,False,True,False,True,False,...,False,False,False,False,False,False,False,False,True,False


In [8]:
from sklearn.model_selection import train_test_split

X = df.drop("class", axis=1)
y = df["class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [9]:
X_train = np.array(X_train, dtype=np.float64)
X_test = np.array(X_test, dtype=np.float64)
y_train = np.array(y_train, dtype=np.float64)
y_test = np.array(y_test, dtype=np.float64)

In [10]:
class ClassificationModel(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.seq = nn.Sequential(
            nn.Linear(in_features=num_features, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=1)
        )

    def forward(self, x: torch.Tensor):
        return self.seq(x)

In [11]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

In [12]:
torch.manual_seed(42)
model = ClassificationModel( X_train.shape[1] )

loss = nn.BCEWithLogitsLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01)

In [13]:
X_train = torch.from_numpy( X_train ).type(torch.float32)
X_test = torch.from_numpy( X_test ).type(torch.float32)
y_train = torch.from_numpy( y_train ).type(torch.float32)
y_test = torch.from_numpy( y_test ).type(torch.float32)

In [None]:
epochs = 2_500
train_costs = []
test_costs = []

acc_train = []
acc_test = []

for epoch in range(epochs):
    optim.zero_grad()
    
    y_pred = model(X_train).squeeze()
    y_train_pred = torch.round(torch.sigmoid( y_pred ))
    acc_train.append( accuracy_fn( y_train, y_train_pred )) 

    train_cost = loss(y_pred, y_train)
    train_cost.backward()
    train_costs.append(train_cost.item())

    optim.step()

    with torch.no_grad():
        y_pred = model(X_test).squeeze()
        test_cost = loss(y_pred, y_test)
        y_test_pred = torch.round(torch.sigmoid( y_pred ))
        acc_test.append( accuracy_fn(y_test, y_test_pred) )
        
        test_costs.append(test_cost.item())

In [None]:
print(f"Train Accuracy: {acc_train[-1]}, Test Accuracy: {acc_test[-1]}")

plt.plot(np.arange(len(acc_train)), acc_train, label="Train Accuracy")
plt.plot(np.arange(len(acc_test)), acc_test, label="Test Accuracy")

plt.title("Accuracy Variation")
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

In [None]:
print(f"Train Cost: {train_cost}, Test Cost: {test_cost}")

plt.plot(np.arange(len(train_costs)), train_costs, label="Train Cost")
plt.plot(np.arange(len(test_costs)), test_costs, label="Test Cost")

plt.title("Cost Variation")
plt.xlabel("Number of Epochs")
plt.ylabel("Cost")
plt.legend()

plt.show()

In [None]:
train_acc = accuracy_fn(y_train, y_train_pred)
test_acc = accuracy_fn(y_test, y_test_pred)

print(f"Accuracy on Training Data: {train_acc:.2f}%, Accuracy on Testing Data: {test_acc:.2f}%")