<a href="https://colab.research.google.com/github/snpsuen/Deep_Learning_Data/blob/main/script/Pytorch_MLP_Binclass_Datasetloader_AT_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, default_collate
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from numpy import vstack

In [2]:
# Read data, convert to NumPy arrays
path = 'https://raw.githubusercontent.com/snpsuen/Deep_Learning_Data/refs/heads/main/dataset/sonar.csv'
data = pd.read_csv(path)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

print("X shape = ", X.shape)
n_insamples = X.shape[0]
n_inputs = X.shape[1]
print("Number of input samples = ", n_insamples)
print("Number of input features = ", n_inputs)

X shape =  (208, 60)
Number of input samples =  208
Number of input features =  60


In [3]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y).reshape(-1, 1)

print("y shape = ", y.shape)
n_outsamples = y.shape[0]
n_outputs = y.shape[1]
print("Number of output samples = ", n_outsamples)
print("Number of output targets = ", n_outputs)


y shape =  (208, 1)
Number of output samples =  208
Number of output targets =  1


In [4]:
# define sonar dataset
class SonarDataset(Dataset):
    def __init__(self, X, y):
        # convert into PyTorch tensors and remember them
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        # this should return the size of the dataset
        return len(self.X)

    def __getitem__(self, idx):
        # this should return one sample from the dataset
        features = self.X[idx]
        target = self.y[idx]
        return features, target

In [5]:
# set up DataLoaders for data set
dataset = SonarDataset(X, y)
trainset, testset = random_split(dataset, [0.7, 0.3])
trainloader = DataLoader(trainset, shuffle=True, batch_size=16)
testloader = DataLoader(testset, shuffle=True, batch_size=16)

In [6]:
# create model
model = nn.Sequential(
    nn.Linear(n_inputs, 60),
    nn.ReLU(),
    nn.Linear(60, 30),
    nn.ReLU(),
    nn.Linear(30, n_outputs),
    nn.Sigmoid()
)

In [7]:
# Train the model
n_epochs = 200
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
model.train()
for epoch in range(n_epochs):
    for X_batch, y_batch in trainloader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [8]:
# create one test tensor from the testset
X_test, y_test = default_collate(testset)
model.eval()
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 85.48%


In [12]:
# evaluate the model
predictions, actuals = list(), list()
model.eval()
for X_batch, y_batch in testloader:
  # evaluate the model on the test set
  yhat = model(X_batch)
  yhat = yhat.detach().numpy()
  yact = y_batch.numpy()
  yact = yact.reshape((len(yact), 1))

  # round to class values
  yhat = yhat.round()
  # store
  predictions.append(yhat)
  actuals.append(yact)

predictions, actuals = vstack(predictions), vstack(actuals)
# calculate accuracy
acc = accuracy_score(actuals, predictions)
print('Accuracy: %.3f' % acc)

Accuracy: 0.855
