<a href="https://colab.research.google.com/github/myredex/collecting_stock_data/blob/master/03_pytorch_binary_classification_simple.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Binary classification with pytorch

## Imports

In [1]:
# Import required libs
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.datasets import make_classification
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Generate some data

In [2]:
BATCH_SIZE = 32

# Create device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_classes=2, random_state=42)

# 2. Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device).unsqueeze(dim=1)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device).unsqueeze(dim=1)

# Create datasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Create data loader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Device: cpu


## Define model

In [3]:
# Create model class
class BinaryClassificationModel(nn.Module):
    def __init__(self, input_dim):
        super(BinaryClassificationModel, self).__init__()
        self.layer_1 = nn.Linear(in_features=input_dim, out_features=16)
        self.layer_2 = nn.Linear(16, 8)
        self.output_layer = nn.Linear(8, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.layer_1(x))
        x = self.relu(self.layer_2(x))
        x = self.sigmoid(self.output_layer(x))
        return x


# Create model's instance
model = BinaryClassificationModel(input_dim=X_train.shape[1]).to(device)

# Set optimizer and loss
loss_fn = nn.BCELoss()
optimizer = optim.Adam(params=model.parameters(), lr=0.001)

## Train the model

In [4]:
epochs = 10


for epoch in range(epochs):
    train_loss, train_acc = 0, 0
    model.train()
    for batch, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        train_loss += loss
        loss.backward()
        optimizer.step()
    loss = train_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}, Loss: {loss.item():.4f}]")

Epoch [1/10, Loss: 0.7146]
Epoch [2/10, Loss: 0.6970]
Epoch [3/10, Loss: 0.6798]
Epoch [4/10, Loss: 0.6524]
Epoch [5/10, Loss: 0.6035]
Epoch [6/10, Loss: 0.5363]
Epoch [7/10, Loss: 0.4544]
Epoch [8/10, Loss: 0.3723]
Epoch [9/10, Loss: 0.3028]
Epoch [10/10, Loss: 0.2539]


## Evaluate model

In [6]:
model.eval()
with torch.inference_mode():
    test_outputs = model(X_test)
    test_predictions = (test_outputs > 0.5).float()
    accuracy = (test_predictions.eq(y_test).sum() / y_test.shape[0]).item()
    print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9050
