In [3]:
# pip install

!pip install black>=24.8.0
!pip install lightning[extra]>=2.4.0
!pip install rich>=13.8.1
!pip install tensorboard>=2.18.0
!pip install timm>=1.0.9
!pip install torch>=2.4.1
!pip install torchvision>=0.19.1

# data module - data_modules.py


In [4]:
import os
import zipfile
import requests
from pathlib import Path
from typing import Optional
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder

class DogsDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = 'data', batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.dataset_url = "https://github.com/abhiyagupta/Datasets/raw/main/CNN_Datasets/dogs_classifier_dataset.zip"
        self.dataset_zip_path = os.path.join(self.data_dir, 'dogs_classifier_dataset.zip')

    def prepare_data(self):
        if not os.path.exists(os.path.join(self.data_dir, 'dogs_classifier_dataset')):
            self.download_and_extract_dataset()

    def download_and_extract_dataset(self):
        Path(self.data_dir).mkdir(parents=True, exist_ok=True)

        if not os.path.exists(self.dataset_zip_path):
            print(f"Downloading dataset from {self.dataset_url}...")
            try:
                response = requests.get(self.dataset_url)
                response.raise_for_status()  # Raises an HTTPError for bad responses
                with open(self.dataset_zip_path, 'wb') as f:
                    f.write(response.content)
                print("Download complete!")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading the dataset: {e}")
                return

        print("Extracting dataset...")
        try:
            with zipfile.ZipFile(self.dataset_zip_path, 'r') as zip_ref:
                zip_ref.extractall(self.data_dir)
            print("Extraction complete!")
        except zipfile.BadZipFile:
            print("Error: The downloaded file is not a valid zip file.")
            # Optionally, delete the invalid zip file
            os.remove(self.dataset_zip_path)
        except Exception as e:
            print(f"Error extracting the dataset: {e}")


    def setup(self, stage: Optional[str] = None):
        # Ensure the dataset is ready
        self.prepare_data()

        # Load the data
        dataset_path = os.path.join(self.data_dir, 'dogs_classifier_dataset')
        full_dataset = ImageFolder(dataset_path, transform=self.transform)

        # Split the dataset into training, validation, and test sets
        train_size = int(0.8 * len(full_dataset))
        val_size = int(0.1 * len(full_dataset))
        test_size = len(full_dataset) - train_size - val_size
        self.train_dataset, self.val_dataset, self.test_dataset = random_split(
            full_dataset, [train_size, val_size, test_size]
        )

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)


# model architecture -model.py


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import pytorch_lightning as pl

class DogsClassifier(pl.LightningModule):
    def __init__(self, num_classes: int = 120, learning_rate: float = 1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.model = models.resnet50(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)



# train.py

In [8]:
import sys
# sys.path.append('/app')

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, RichProgressBar
# from data_module.dogs_datamodule import DogsDataModule
# from model.dogs_classifier import DogsClassifier

def main():
    # Create data module
    data_module = DogsDataModule(data_dir='data', batch_size=32)

    # Create model
    model = DogsClassifier(num_classes=120, learning_rate=1e-3)

    # Setup logging
    logger = TensorBoardLogger("logs", name="dogs_classifier")

    # Setup checkpointing
    checkpoint_callback = ModelCheckpoint(
        dirpath='checkpoints',
        filename='dogs_classifier-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        monitor='val_loss'
    )

    # Create trainer
    trainer = pl.Trainer(
        max_epochs=1,
        logger=logger,
        callbacks=[checkpoint_callback, RichProgressBar()],
        accelerator='auto',
    )

    # Train the model
    trainer.fit(model, data_module)

# if __name__ == "__main__":
#     main()

# eval.py

In [9]:
import pytorch_lightning as pl
# from data_modules.dogs_datamodule import DogsDataModule
# from models.dogs_classifier import DogsClassifier

def main():
    # Load data module
    data_module = DogsDataModule(data_dir='data', batch_size=32)

    # Load the best model using the last checkpoint
    model = DogsClassifier.load_from_checkpoint("checkpoints/best_model.ckpt")

    # Create trainer
    trainer = pl.Trainer(accelerator='auto')

    # Evaluate the model
    trainer.test(model, datamodule=data_module)

# if __name__ == "__main__":
#     main()



# infer.py

In [10]:
import torch
from PIL import Image
from torchvision import transforms
# from models.dogs_classifier import DogsClassifier

def main():
    # Load the best model using the last checkpoint
    model = DogsClassifier.load_from_checkpoint("checkpoints/best_model.ckpt")
    model.eval()

    # Prepare image transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Load and preprocess the image
    image_path = "path/to/your/image.jpg"
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)

    # Perform inference
    with torch.no_grad():
        output = model(image)
        prediction = torch.argmax(output, dim=1)

    print(f"Predicted class: {prediction.item()}")

# if __name__ == "__main__":
#     main()



In [11]:
data_module = DogsDataModule()
data_module.prepare_data()

Extracting dataset...
Extraction complete!
