# Gravity Spy model

The notebook contains a Pytorch Lightning model trained on Gravity Spy dataset 

### Import necessary modules

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger

### Define images transformations

In [2]:
transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.Resize((47, 57)),  # Resize to 47x57
        transforms.ToTensor(),  # Convert to tensor
    ]
)

### Load training, validation, and test data

In [5]:
train_data_dir = "../data/train/train"
train_dataset = datasets.ImageFolder(root=train_data_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=30, shuffle=True)

validation_data_dir = "../data/validation/validation"
validation_dataset = datasets.ImageFolder(root=validation_data_dir, transform=transform)
validation_loader = DataLoader(validation_dataset, batch_size=30, shuffle=False)

test_data_dir = "../data/test/test"
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=30, shuffle=False)

### Pytorch Lightning model

In [6]:
class LitCNNModel(pl.LightningModule):
    def __init__(self, num_classes=22, learning_rate=1.0):
        super(LitCNNModel, self).__init__()
        self.save_hyperparameters()  # Save hyperparameters for logging and checkpointing
        self.conv1 = nn.Conv2d(1, 128, kernel_size=5)
        self.conv2 = nn.Conv2d(128, 128, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()

        # Use dummy input to calculate flattened size dynamically
        dummy_input = torch.zeros(1, 1, 47, 57)  # Batch size = 1, Grayscale channels, 47x57 image
        out = self._forward_conv(dummy_input)
        self.flattened_size = out.numel()

        self.fc1 = nn.Linear(self.flattened_size, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def _forward_conv(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        return x

    def forward(self, x):
        x = self._forward_conv(x)
        x = x.view(x.size(0), -1)  # Flatten dynamically
        x = self.relu(self.fc1(x))
        x = self.fc2(x)  # Output raw logits (no softmax here)
        return x

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        _, predicted = torch.max(outputs, 1)
        acc = (predicted == labels).float().mean()
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adadelta(self.parameters(), lr=self.hparams.learning_rate)

### Metrics counting function definitions

In [7]:
def calculate_model_accuracy(model, dataloader, device):
    model.eval()
    model.to(device)
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total * 100
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy

### Train model

30 epochs took me >1 hour to train the model. Consider changing the number of epochs

In [8]:
csv_logger = CSVLogger("logs", name="cnn_model")
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss", mode="min", save_top_k=1, dirpath="checkpoints", filename="best_model"
)

model = LitCNNModel(num_classes=len(train_dataset.classes))
trainer = pl.Trainer(
    max_epochs=30,  # Change this if training takes too long
    accelerator="auto",  # Automatically selects GPU if available
    log_every_n_steps=10,
    callbacks=[checkpoint_callback],
    logger=csv_logger,
)

trainer.fit(model, train_loader, validation_loader)

Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A5000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name  | Type      | Params | Mode 
--------------------------------------------
0 | conv1 | Conv2d    | 3.3 K  | train
1 | conv2 | Conv2d    | 409 K  | train
2 | pool  | MaxPool2d | 0   

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/agamalii/miniconda3/envs/gs_torch_env/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.


                                                                           

/home/agamalii/miniconda3/envs/gs_torch_env/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.


Epoch 29: 100%|██████████| 745/745 [02:44<00:00,  4.53it/s, v_num=0, train_loss_step=0.351, val_loss=0.835, val_acc=0.918, train_loss_epoch=0.0267]   

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 745/745 [02:44<00:00,  4.53it/s, v_num=0, train_loss_step=0.351, val_loss=0.835, val_acc=0.918, train_loss_epoch=0.0267]


### Evaluate model

In [9]:
best_model_path = checkpoint_callback.best_model_path
print(f"Best model saved at: {best_model_path}")

best_model = LitCNNModel.load_from_checkpoint(best_model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
calculate_model_accuracy(best_model, test_loader, device)

Best model saved at: /home/agamalii/FO/Gravitational-Wave-Detection/models/checkpoints/best_model.ckpt
Accuracy: 93.14%


93.13559322033899