In [46]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [135]:
data = pd.read_csv('mushroom_cleaned.csv')

# preprocess the data
label_encoder = LabelEncoder()
data['class'] = label_encoder.fit_transform(data['class'])
X = data.drop(columns=['class'])
y = data['class']

In [136]:
X_train, X_test, y_train, y_test = train_test_split(X.values,y.values, test_size=0.2, random_state=42)

In [137]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


In [138]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)


In [139]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [160]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x
        

In [161]:
model = BinaryClassifier(input_size=X.shape[-1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [167]:
# Training loop

num_epochs = 30
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

best_acc = - np.inf
best_weights = None

for epoch in range(num_epochs):
    print(f'Epoch {epoch}....')
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        outputs = model(inputs)
        predicted = outputs.squeeze()
        
        loss = criterion(outputs.squeeze(), labels.float())
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update the weights
        optimizer.step()

        # acc = (outputs.round() == labels).float().mean()
        # print(acc)
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

Epoch 0....
Epoch [1/30], Loss: 0.4905
Epoch 1....
Epoch [2/30], Loss: 0.4818
Epoch 2....
Epoch [3/30], Loss: 0.4841
Epoch 3....
Epoch [4/30], Loss: 0.4804
Epoch 4....
Epoch [5/30], Loss: 0.4814
Epoch 5....
Epoch [6/30], Loss: 0.4775
Epoch 6....
Epoch [7/30], Loss: 0.4753
Epoch 7....
Epoch [8/30], Loss: 0.4772
Epoch 8....
Epoch [9/30], Loss: 0.4725
Epoch 9....
Epoch [10/30], Loss: 0.4732
Epoch 10....
Epoch [11/30], Loss: 0.4713
Epoch 11....
Epoch [12/30], Loss: 0.4663
Epoch 12....
Epoch [13/30], Loss: 0.4694
Epoch 13....
Epoch [14/30], Loss: 0.4695
Epoch 14....
Epoch [15/30], Loss: 0.4669
Epoch 15....
Epoch [16/30], Loss: 0.4650
Epoch 16....
Epoch [17/30], Loss: 0.4630
Epoch 17....
Epoch [18/30], Loss: 0.4622
Epoch 18....
Epoch [19/30], Loss: 0.4617
Epoch 19....
Epoch [20/30], Loss: 0.4589
Epoch 20....
Epoch [21/30], Loss: 0.4607
Epoch 21....
Epoch [22/30], Loss: 0.4596
Epoch 22....
Epoch [23/30], Loss: 0.4584
Epoch 23....
Epoch [24/30], Loss: 0.4567
Epoch 24....
Epoch [25/30], Loss: 0

In [169]:
# Evaluation
model.eval()
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs.squeeze() >= 0.5).long()  # Convert probabilities to binary predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Accuracy on test set: {accuracy:.4f}")

Accuracy on test set: 0.7701


In [179]:
test_x = torch.tensor([611,5,1,6,0.7842865255228345,673,12,0.9431945538974952])
test_y = model(test_x)
p = (test_y.squeeze() >= 0.5).long()
print(p)

tensor(1)
