# Binary Classification on the Hypnogram Data

In [1]:
from pathlib import Path

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import lightning.pytorch as pl

## Loading data

In [2]:
project_path = Path.cwd().parent
input_path = project_path / 'data' / 'hypnogram' / 'hypnogram_input.csv'
output_path = project_path / 'data' / 'hypnogram' / 'hypnogram_output.csv'

In [3]:
class Hypnogram(Dataset):

    def __init__(self, input_path: Path, output_path: Path, train=True):
        x = np.loadtxt(input_path, dtype=int, delimiter=',')
        y = np.loadtxt(output_path, dtype=int, delimiter=',')
        split_index = int(x.shape[0] * 0.8)
        if train:
            self.x = x[:split_index, :]
            self.y = y[:split_index]
        else:
            self.x = x[split_index:, :]
            self.y = y[split_index:]
        assert self.x.shape[0] == self.y.shape[0]
        self.x = torch.tensor(self.x, dtype=torch.float)
        self.y = torch.tensor(self.y, dtype=torch.float)
        self.y = torch.unsqueeze(self.y, dim=1)
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [4]:
train_dataset = Hypnogram(input_path=input_path, output_path=output_path, train=True)
test_dataset = Hypnogram(input_path=input_path, output_path=output_path, train=False)

In [5]:
print(f"{train_dataset.x.shape = }")
print(f"{train_dataset.y.shape = }")
print(f"{test_dataset.x.shape = }")
print(f"{test_dataset.y.shape = }")

train_dataset.x.shape = torch.Size([36865, 300])
train_dataset.y.shape = torch.Size([36865, 1])
test_dataset.x.shape = torch.Size([9217, 300])
test_dataset.y.shape = torch.Size([9217, 1])


In [6]:
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

## Model

In [7]:
class LinearModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.linear = torch.nn.Linear(300, 1)
        self.sigmoid = torch.nn.Sigmoid()
        self.criterion = torch.nn.BCELoss()
        self.training_step_outputs = []

    def training_step(self, train_batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        x, y = train_batch
        y_hat = self.sigmoid(self.linear(x))
        loss = self.criterion(y_hat, y)
        self.training_step_outputs.append(y_hat.round() == y)
        self.log("train_loss", loss)
        return loss

    def on_train_epoch_end(self):
        all_preds = torch.stack(self.training_step_outputs)
        print(f"{torch.sum(all_preds) = }")
        print(f"{all_preds.shape = }")
        self.training_step_outputs.clear()  # free memory
    
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self.sigmoid(self.linear(x))
        loss = self.criterion(y_hat, y)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

lr_model = LinearModel()

In [8]:
# test run on 100 data batches and 1 epoch
trainer = pl.Trainer(limit_train_batches=100, max_epochs=1)
trainer.fit(model=lr_model, train_dataloaders=train_loader, val_dataloaders=test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs




RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [10]:
num_epochs = 1
trainer = pl.Trainer(max_epochs=num_epochs)
trainer.fit(model=lr_model, train_dataloaders=train_loader, val_dataloaders=test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [None]:
# load checkpoint
checkpoint = "./lightning_logs/version_7/checkpoints/epoch=9-step=1000.ckpt"
model = LinearModel.load_from_checkpoint(checkpoint)

# choose your trained nn.Module
model.eval()

# embed 4 fake images!
print("⚡" * 20, "\nPredictions (4 image embeddings):\n", embeddings, "\n", "⚡" * 20)

In [18]:
x = np.loadtxt(input_path, dtype=int, delimiter=',')
y = np.loadtxt(output_path, dtype=int, delimiter=',')