In [31]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import torchvision.models as models
import torch.nn as nn
import torch
from sklearn.model_selection import KFold
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay

In [32]:
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import tqdm


class CatNotCatDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        self.class_to_idx = {'antelope': 0, 'bear': 1, 'bird': 2, 'cat': 3, 'dog': 4}  # Example class mapping

        for class_name, class_idx in self.class_to_idx.items():
            class_path = os.path.join(root_dir, class_name)
            if os.path.isdir(class_path):
                for img_file in os.listdir(class_path):
                    if img_file.lower().endswith(('png', 'jpg', 'jpeg')):
                        img_path = os.path.join(class_path, img_file)
                        self.samples.append((img_path, class_idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        return image, label


In [33]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization
])

dataset = CatNotCatDataset(root_dir='Classified_Animals', transform=transform)

# Splitting dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [34]:
# Define the ResNet-18 model class for binary classification
class ResNet18Binary(nn.Module):
    def __init__(self, pretrained=True):
        super(ResNet18Binary, self).__init__()
        self.resnet18 = models.resnet18(pretrained=pretrained)
        num_ftrs = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(num_ftrs, 5)

    def forward(self, x):
        return self.resnet18(x)

In [35]:
import matplotlib.pyplot as plt

def visualize_feature_maps(feature_maps):
    # Assuming feature_maps is a list of tensors from the convolutional layers
    for layer, f_map in enumerate(feature_maps):
        layer += 1  # Start layer indexing at 1
        plt.figure(figsize=(20, 15))
        channels = f_map.shape[1]
        
        for i in range(channels):
            plt.subplot(channels // 8 + 1, 8, i + 1)  # Arrange plots in a grid
            plt.imshow(f_map[0, i].detach().cpu().numpy(), cmap='gray')
            plt.axis('off')
        
        plt.show()


In [36]:
def train_model_cv(model, dataset, train_idx, val_idx, num_epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    # Creating data loaders for the current fold
    train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)
    train_loader = DataLoader(dataset, batch_size=32, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=32, sampler=val_sampler)
    
    for epoch in tqdm.tqdm(range(num_epochs)):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm.tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Print accuracy for the current epoch
        print(f'Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}, ' +
              f'Validation Loss: {val_loss / len(val_loader)}, ' +
              f'Accuracy: {100 * correct / total}%')
    
    # After training, print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, all_preds))
    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, all_preds))

    print('Finished Training')

# Implementing 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
for fold, (train_idx, val_idx) in enumerate(kf.split(np.arange(len(dataset)))):
    print(f"Fold {fold + 1}")
    model = ResNet18Binary(pretrained=True)  # Re-instantiate the model for each fold
    train_model_cv(model, dataset, train_idx, val_idx, num_epochs=3)

Fold 1


100%|██████████| 5/5 [00:17<00:00,  3.41s/it]
 33%|███▎      | 1/3 [00:20<00:40, 20.26s/it]

Epoch 1, Loss: 1.2261739492416381, Validation Loss: 0.5360958576202393, Accuracy: 88.75%


100%|██████████| 5/5 [00:15<00:00,  3.20s/it]
 67%|██████▋   | 2/3 [00:39<00:19, 19.74s/it]

Epoch 2, Loss: 0.24276931434869767, Validation Loss: 0.22628219922383627, Accuracy: 97.5%


100%|██████████| 5/5 [00:24<00:00,  4.82s/it]
100%|██████████| 3/3 [01:06<00:00, 22.31s/it]

Epoch 3, Loss: 0.07293698564171791, Validation Loss: 0.12192034721374512, Accuracy: 97.5%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       0.96      1.00      0.98        24
           3       0.95      0.95      0.95        20
           4       1.00      0.95      0.97        20

    accuracy                           0.97        80
   macro avg       0.98      0.98      0.98        80
weighted avg       0.98      0.97      0.97        80

Confusion Matrix:
[[16  0  0  0]
 [ 0 24  0  0]
 [ 0  1 19  0]
 [ 0  0  1 19]]
Finished Training
Fold 2



  0%|          | 0/3 [00:00<?, ?it/s]