# Building a Neural Network Classifier in PyTorch

This tutorial walks you through building a basic neural network using **PyTorch's `nn.Module`**.  
We use the **Breast Cancer Wisconsin dataset** to perform binary classification.


### 1. Import Required Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


### 2. Load and Preprocess the Dataset

We load the dataset, standardize the features, split into training and testing sets, and convert the data to PyTorch tensors.


In [2]:
# Load the dataset
data = load_breast_cancer()

In [3]:
# Create a DataFrame from the data
df = pd.DataFrame(data=data.data, columns=data.feature_names)

# Add the target column
df['target'] = data.target

# Optional: Display the first few rows
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [4]:
df.shape

(569, 31)

In [5]:
# Separate features and target
X = df.drop('target', axis=1)
y = df['target']

# Split the dataset: 80% train, 20% test (you can adjust the test_size as needed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optional: Display the shapes of the splits
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(455, 30) (114, 30) (455,) (114,)


In [6]:
print(type(X_train), type(X_test), type(y_train), type(y_test))

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>


In [7]:
# Standardize features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [8]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [9]:
print(type(X_train_tensor), type(X_test_tensor), type(y_train_tensor), type(y_test_tensor))

<class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'>


In [10]:
print(X_train_tensor.dtype, X_test_tensor.dtype, y_train_tensor.dtype, y_test_tensor.dtype)

torch.float32 torch.float32 torch.float32 torch.float32


### 3. Creating Custom Dataset using Dataset & DataLoader

In [12]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
    
    def __len__(self):
        # Return total number of samples
        return len(self.features)
    
    def __getitem__(self, index):
        # Return one sample of data and label
        return self.features[index], self.targets[index]


In [13]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [14]:
train_dataset[10]

(tensor([-0.4976,  0.6137, -0.4981, -0.5310, -0.5769, -0.1749, -0.3622, -0.2849,
          0.4335,  0.1782, -0.3684,  0.5531, -0.3167, -0.4052,  0.0403, -0.0380,
         -0.1804,  0.1648, -0.1217,  0.2308, -0.5004,  0.8194, -0.4692, -0.5331,
         -0.0491, -0.0416, -0.1491,  0.0968,  0.1062,  0.4904]),
 tensor(1.))

In [15]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

### 4. Define the Model Class

We define a neural network using PyTorch's `nn.Module` with the following architecture:
- Linear layer with 3 hidden units
- ReLU activation
- Output layer with sigmoid activation for binary classification


In [16]:
class MySimpleNN(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features, 3),
            nn.ReLU(),
            nn.Linear(3, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

### 5. Set Training Parameters
We define the hyperparameters and loss function.

In [18]:
# create model
model = MySimpleNN(X_train_tensor.shape[1])
# define epochs
epochs = 25
# define loss function
loss_function = nn.BCELoss()
# define learning_rate
learning_rate = 0.1
# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)



### 6. Train the Model
We train the model using the SGD optimizer and track the loss at each epoch.

In [None]:
# define loop
for epoch in range(epochs):
  for batch_features, batch_labels in train_loader:

    # forward pass
    y_pred = model(batch_features)

    # loss calculate
    loss = loss_function(y_pred, batch_labels.view(-1,1))

    # clear gradients
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    # parameters update
    optimizer.step()

  # print loss in each epoch
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.31420519948005676
Epoch: 2, Loss: 0.0004274158854968846
Epoch: 3, Loss: 0.0023832565639168024
Epoch: 4, Loss: 0.0012654215097427368
Epoch: 5, Loss: 0.011919582262635231
Epoch: 6, Loss: 0.13620533049106598
Epoch: 7, Loss: 0.03152069076895714
Epoch: 8, Loss: 0.01760205067694187
Epoch: 9, Loss: 0.00961923599243164
Epoch: 10, Loss: 0.029511677101254463
Epoch: 11, Loss: 0.030012700706720352
Epoch: 12, Loss: 0.1285409778356552
Epoch: 13, Loss: 0.0023159205447882414
Epoch: 14, Loss: 0.00015762599650770426
Epoch: 15, Loss: 0.009274658747017384
Epoch: 16, Loss: 0.004612325690686703
Epoch: 17, Loss: 0.01296616904437542
Epoch: 18, Loss: 0.00548334838822484
Epoch: 19, Loss: 0.03049200214445591
Epoch: 20, Loss: 0.007991793565452099
Epoch: 21, Loss: 0.00716188782826066
Epoch: 22, Loss: 0.009107222780585289
Epoch: 23, Loss: 0.025988789275288582
Epoch: 24, Loss: 0.007622851990163326
Epoch: 25, Loss: 0.0031806137412786484


In [54]:
print("Parameters", model.network.parameters)
for layer in model.network:
    if isinstance(layer, nn.Linear):
        print(layer)
        print("-"*30)
        print("Weights:", layer.weight)
        print("-"*30)
        print("Bias:", layer.bias)

Parameters <bound method Module.parameters of Sequential(
  (0): Linear(in_features=30, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
  (3): Sigmoid()
)>
Linear(in_features=30, out_features=3, bias=True)
------------------------------
Weights: Parameter containing:
tensor([[ 0.2369,  0.2718,  0.4842,  0.4907, -0.0364,  0.1770,  0.3463,  0.4439,
         -0.1423, -0.2371,  0.1677, -0.2378,  0.2136,  0.3020,  0.2010, -0.1934,
          0.1740,  0.0628, -0.0814, -0.3458,  0.5118,  0.6636,  0.5081,  0.2833,
          0.5597,  0.0163,  0.4181,  0.5547,  0.5729, -0.0520],
        [ 0.0583,  0.2333,  0.1062,  0.2185,  0.2838, -0.0287,  0.2530,  0.0957,
         -0.0218, -0.0712,  0.3551,  0.0741,  0.2575,  0.1055, -0.1583, -0.2673,
         -0.0390,  0.2479,  0.0711, -0.2443,  0.1495,  0.1102,  0.1891,  0.1000,
          0.2266, -0.0350,  0.0491,  0.1541,  0.3704,  0.1460],
        [-0.0622, -0.0241, -0.1114, -0.1618, -0.0120,  0.3609, -0.304

### 6. Evaluate the Model
We test the model on unseen data and calculate accuracy.

In [25]:
# Model evaluation using test_loader
model.eval()  # Set the model to evaluation mode
accuracy_list = []

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        # Forward pass
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.5).float()  # Convert probabilities to binary predictions

        # Calculate accuracy for the current batch
        batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

# Calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')


Accuracy: 0.9844


###  Conclusion
We built and trained a simple neural network using PyTorch.

- We used `nn.Sequential` for fast model definition.
- Training was done with `BCELoss` and `SGD`.
- We used dataaset class and dataloader for creating custom data.
- We calculated accuracy for binary classification from each batch and calculated mean accuracy
