# 1. Build your own convolutional neural network using pytorch

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import pandas as pd
import numpy as np

In [21]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Improved CNN
class DogHeartCNN(nn.Module):
    def __init__(self):
        super(DogHeartCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 3)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = DogHeartCNN().to(device)

Using device: cpu


# 2. Train your model using dog heart dataset (you may need to use  Google Colab (or Kaggle) with GPU to train your code) 

### (1) use torchvision.datasets.ImageFolder for the training dataset
### (2) use custom dataloader for test dataset (return image tensor and file name)

In [23]:
# Paths
data_dir = "Dog_X_ray/Dog_heart"
train_dir = os.path.join(data_dir, "Train")
valid_dir = os.path.join(data_dir, "Valid")
test_dir = "Dog_X_ray/Test"

In [25]:
# Transforms
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor()
])
valid_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# Datasets
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
valid_dataset = datasets.ImageFolder(valid_dir, transform=valid_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

print("Classes:", train_dataset.classes)

# Test dataset
class TestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.test_dir = test_dir
        self.transform = transform
        self.image_paths = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(path)

test_dataset = TestDataset(test_dir, transform=valid_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Classes: ['Large', 'Normal', 'Small']


In [27]:
# Loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5, verbose=True)

# Training loop (full 40 epochs)
num_epochs = 40
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(valid_loader.dataset)
    val_acc = 100 * val_correct / val_total

    scheduler.step(val_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")




Epoch [1/40] Train Loss: 2.2448, Train Acc: 42.14% Val Loss: 1.0627, Val Acc: 45.50%
Epoch [2/40] Train Loss: 1.0293, Train Acc: 44.29% Val Loss: 0.9782, Val Acc: 44.50%
Epoch [3/40] Train Loss: 0.9656, Train Acc: 47.21% Val Loss: 0.9280, Val Acc: 47.50%
Epoch [4/40] Train Loss: 0.9286, Train Acc: 48.29% Val Loss: 0.8495, Val Acc: 60.50%
Epoch [5/40] Train Loss: 0.8600, Train Acc: 54.00% Val Loss: 0.7769, Val Acc: 62.50%
Epoch [6/40] Train Loss: 0.8017, Train Acc: 59.64% Val Loss: 0.7576, Val Acc: 62.00%
Epoch [7/40] Train Loss: 0.7677, Train Acc: 60.57% Val Loss: 0.7238, Val Acc: 62.50%
Epoch [8/40] Train Loss: 0.7368, Train Acc: 62.43% Val Loss: 0.7141, Val Acc: 65.00%
Epoch [9/40] Train Loss: 0.7284, Train Acc: 61.86% Val Loss: 0.7218, Val Acc: 61.50%
Epoch [10/40] Train Loss: 0.7011, Train Acc: 64.14% Val Loss: 0.6509, Val Acc: 69.00%
Epoch [11/40] Train Loss: 0.7091, Train Acc: 64.50% Val Loss: 0.6510, Val Acc: 71.50%
Epoch [12/40] Train Loss: 0.7080, Train Acc: 65.64% Val Loss: 0

In [32]:
torch.save(model.state_dict(), "dog_heart_cnn.pt")
print("Model saved to dog_heart_cnn.pt")

Model saved to dog_heart_cnn.pt


# 3. Evaluate your model using the developed software

In [29]:
# Predictions on test set
label_map = {class_name: idx for class_name, idx in train_dataset.class_to_idx.items()}
idx_to_class = {v: k for k, v in train_dataset.class_to_idx.items()}
model.eval()
predictions = []

with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predicted_labels = [label_map[idx_to_class[idx]] for idx in preds.cpu().numpy()]
        for fname, label in zip(filenames, predicted_labels):
            predictions.append((fname, label))

# Save to CSV 
df_preds = pd.DataFrame(predictions)
df_preds.to_csv("dog_heart_predictions.csv", index=False, header=False)
print("Saved predictions with numeric labels to dog_heart_predictions.csv")

Saved predictions with numeric labels to dog_heart_predictions.csv


# 4. Compare results with [RVT paper](https://www.nature.com/articles/s41598-023-50063-x). Requirement: performance is better than VGG16: 75%

The custom CNN model achieved a validation accuracy of 73%, which reflects competitive performance on the Dog Heart X-ray dataset. However, this result falls slightly below the benchmark set by VGG16, which achieved 75% accuracy. In contrast, the Regressive Vision Transformer (RVT), as proposed by Zhang et al. (2023), reached a notably higher accuracy of 92.3%, highlighting the effectiveness of transformer-based architectures in this domain. These findings suggest that incorporating advanced architectures such as RVT could lead to improved performance in future implementations.

# 5. Write a four-page paper report using the shared LaTex template. Upload your paper to ResearchGate or Arxiv, and put your paper link and GitHub weight link here.

https://www.researchgate.net/publication/390180445_Dog_Heart_Cnn_Report

https://github.com/rimapalli01/NeuralNetwork_Project1


# 6. Grading rubric

(1). Code ------- 20 points (you also need to upload your final model as a pt file)

(2). Grammer ---- 20 points

(3). Introduction & related work --- 10 points


(4). Method  ---- 20 points

(5). Results ---- 20 points

     > = 75 % -->10 points
     < 55 % -->0 points
     >= 55 % & < 75% --> 0.5 point/percent
     

(6). Discussion - 10 points