## Torch Dataset

In [1]:
import torch
from torch.utils.data import Dataset
import pandas as pd

In [2]:
class WaterDataset(Dataset):
    def __init__(self, csv_path, dtype=torch.float32):
        super().__init__()
        df = pd.read_csv(csv_path)
        self.data = df.to_numpy()
        self.dtype = dtype

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, index):
        features = self.data[index, :-1]
        labels = self.data[index, -1]
        
        features_tensor = torch.tensor(features, dtype=self.dtype)
        labels_tensor = torch.tensor(labels, dtype=self.dtype)
        return features_tensor, labels_tensor

In [3]:
train_file = "datasets/water_potability/water_train.csv"
test_file = "datasets/water_potability/water_test.csv"

train_set = WaterDataset(train_file)
test_set = WaterDataset(test_file)

In [4]:
type(train_set[0])

tuple

In [5]:
train_set[0]

(tensor([0.4836, 0.6156, 0.5140, 0.7774, 0.3546, 0.3353, 0.3673, 0.5141, 0.6173]),
 tensor(1.))

## Data Loader

In [6]:
from torch.utils.data import DataLoader

In [7]:
batch_size = 2

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [8]:
features, labels = next(iter(train_loader))
print("Features:", features)
print("Labels:", labels)

Features: tensor([[0.3776, 0.3835, 0.4381, 0.5603, 0.5826, 0.3603, 0.6441, 0.5349, 0.2771],
        [0.4834, 0.2663, 0.2594, 0.7068, 0.5401, 0.1546, 0.4831, 0.3984, 0.6213]])
Labels: tensor([0., 1.])


## Model

In [36]:
import torch.nn as nn
import torch.nn.init as init

In [87]:
class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(9, 16)
        self.bn1 = nn.BatchNorm1d(16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)

        init.kaiming_uniform_(self.fc1.weight)
        init.kaiming_uniform_(self.fc2.weight)
        init.kaiming_uniform_(self.fc3.weight, nonlinearity="sigmoid")

    def forward(self, x):
        x = nn.functional.elu(self.fc1(x))
        x = self.bn1(x)
        x = nn.functional.elu(self.fc2(x))
        x = nn.functional.sigmoid(self.fc3(x))

        return x


In [88]:
model = Model()

## Loss Function and Optimizer

In [89]:
import torch.optim as optim
from torch.nn.functional import relu

In [90]:
# Define Loss function and optimizer.

criterion = nn.BCELoss()
lr = 0.01
optimizer = optim.Adam(model.parameters(), lr=lr)

In [91]:
# Define Metrics
from torchmetrics import Accuracy

acc = Accuracy(task='Binary')

In [92]:
num_epoch = 10

for epoch in range(num_epoch):

    model.train()

    for features, labels in train_loader:
        optimizer.zero_grad()
        y_hat = model(features)

        loss = criterion(
            y_hat, labels.view(-1, 1) 
        )

        loss.backward() # compute gradient
        optimizer.step() # update weights

    model.eval()
    
    with torch.no_grad():
        for features, labels in test_loader:
            out = model(features)
            y_hat = (out >= 0.5).float()
            accuracy = acc(y_hat, labels.view(-1, 1))

    print(f"Epoch {epoch} - Acc: {acc.compute():.2f} | Loss: {loss:.2f}")
    

Epoch 0 - Acc: 0.61 | Loss: 0.60
Epoch 1 - Acc: 0.61 | Loss: 0.62
Epoch 2 - Acc: 0.61 | Loss: 0.58
Epoch 3 - Acc: 0.61 | Loss: 0.49
Epoch 4 - Acc: 0.61 | Loss: 0.74
Epoch 5 - Acc: 0.61 | Loss: 0.40
Epoch 6 - Acc: 0.61 | Loss: 0.77
Epoch 7 - Acc: 0.61 | Loss: 0.74
Epoch 8 - Acc: 0.60 | Loss: 0.62
Epoch 9 - Acc: 0.60 | Loss: 0.63
