In [1]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split, Dataset
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
import torchvision.models as models
import torch.nn as nn
import os
from PIL import Image
import tqdm
import torch

class CatDogDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with subdirectories for each class.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        self.class_names = ['antelope', 'bear', 'bird', 'cat', 'dog']

        # Load all image file paths and their labels
        for class_index, sub_dir in enumerate(self.class_names):
            class_path = os.path.join(root_dir, sub_dir)
            if os.path.isdir(class_path):
                for img_file in os.listdir(class_path):
                    if img_file.lower().endswith(('png', 'jpg', 'jpeg')):
                        img_path = os.path.join(class_path, img_file)
                        self.samples.append((img_path, class_index))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        return image, label


In [3]:
# Define the transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create the dataset
dataset = CatDogDataset(root_dir='Classified_Animals', transform=transform)

from sklearn.model_selection import KFold
import numpy as np

# Assuming dataset is your CatDogDataset instance
dataset_size = len(dataset)
indices = list(range(dataset_size))

# Define the KFold cross-validator
kf = KFold(n_splits=3, shuffle=True, random_state=42)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CatOrNotCNN(nn.Module):
    def __init__(self):
        super(CatOrNotCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=64 * 56 * 56, out_features=512)
        self.fc2 = nn.Linear(512, 5)  # 2 classes: cat or not

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [5]:
import matplotlib.pyplot as plt

def visualize_feature_maps(feature_maps):
    # Assuming feature_maps is a list of tensors from the convolutional layers
    for layer, f_map in enumerate(feature_maps):
        layer += 1  # Start layer indexing at 1
        plt.figure(figsize=(20, 15))
        channels = f_map.shape[1]
        
        for i in range(channels):
            plt.subplot(channels // 8 + 1, 8, i + 1)  # Arrange plots in a grid
            plt.imshow(f_map[0, i].detach().cpu().numpy(), cmap='gray')
            plt.axis('off')
        
        plt.show()


In [6]:
def train_model(model, train_loader, val_loader, num_epochs=3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    for epoch in tqdm.tqdm(range(num_epochs)):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm.tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)  # No need for conv_outputs here
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)  # No need for conv_outputs here
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                

        print(f'Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}, Validation Loss: {val_loss / len(val_loader)}, Accuracy: {100 * correct / total}%')

def train_model_for_fold(model, train_indices, val_indices):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Subset the dataset for the current fold
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_indices)
    
    # Create data loaders for the current fold
    train_loader = DataLoader(dataset, batch_size=32, sampler=train_subsampler)
    val_loader = DataLoader(dataset, batch_size=32, sampler=val_subsampler)
    
    # Place your existing training code here, modified to use the fold's loaders
    # Make sure to reset the model to its initial state before training
    train_model(model, train_loader, val_loader, num_epochs=5)
    
    # After training, evaluate the model to get predictions and true labels
    all_preds = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.view(-1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate and display the confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, all_preds))
    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, all_preds))

# Loop over each fold
for fold, (train_indices, val_indices) in enumerate(kf.split(indices)):
    print(f"Training on fold {fold+1}/3...")
    # Initialize your model for each fold
    model = CatOrNotCNN()
    val_loader = train_model_for_fold(model, train_indices, val_indices)

print('Finished Training')

Training on fold 1/3...


  return torch._C._cuda_getDeviceCount() > 0
100%|██████████| 5/5 [00:12<00:00,  2.57s/it]
 20%|██        | 1/5 [00:15<01:01, 15.36s/it]

Epoch 1, Loss: 3.2352697134017943, Validation Loss: 3.153952121734619, Accuracy: 20.0%


100%|██████████| 5/5 [00:13<00:00,  2.64s/it]
 40%|████      | 2/5 [00:31<00:46, 15.61s/it]

Epoch 2, Loss: 1.8774646520614624, Validation Loss: 1.6524381240208943, Accuracy: 40.0%


100%|██████████| 5/5 [00:11<00:00,  2.31s/it]
 60%|██████    | 3/5 [00:44<00:29, 14.80s/it]

Epoch 3, Loss: 1.365902853012085, Validation Loss: 1.2556060155232747, Accuracy: 53.75%


100%|██████████| 5/5 [00:11<00:00,  2.28s/it]
 80%|████████  | 4/5 [00:58<00:14, 14.38s/it]

Epoch 4, Loss: 0.9366638898849488, Validation Loss: 1.4031315247217815, Accuracy: 42.5%


100%|██████████| 5/5 [00:10<00:00,  2.13s/it]
100%|██████████| 5/5 [01:11<00:00, 14.32s/it]

Epoch 5, Loss: 0.728162693977356, Validation Loss: 0.9973511497179667, Accuracy: 63.75%





Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.56      0.60        16
           1       0.72      0.88      0.79        24
           3       0.52      0.70      0.60        20
           4       0.70      0.35      0.47        20

    accuracy                           0.64        80
   macro avg       0.65      0.62      0.61        80
weighted avg       0.65      0.64      0.62        80

Confusion Matrix:
[[ 9  3  4  0]
 [ 0 21  2  1]
 [ 1  3 14  2]
 [ 4  2  7  7]]
Training on fold 2/3...


100%|██████████| 5/5 [00:11<00:00,  2.32s/it]
 20%|██        | 1/5 [00:13<00:55, 13.81s/it]

Epoch 1, Loss: 2.6120755195617678, Validation Loss: 3.2100423177083335, Accuracy: 27.5%


100%|██████████| 5/5 [00:11<00:00,  2.32s/it]
 40%|████      | 2/5 [00:27<00:41, 13.92s/it]

Epoch 2, Loss: 1.6435523748397827, Validation Loss: 1.7726563215255737, Accuracy: 35.0%


100%|██████████| 5/5 [00:11<00:00,  2.20s/it]
 60%|██████    | 3/5 [00:41<00:27, 13.59s/it]

Epoch 3, Loss: 1.1554855942726134, Validation Loss: 1.6499019861221313, Accuracy: 45.0%


100%|██████████| 5/5 [00:10<00:00,  2.19s/it]
 80%|████████  | 4/5 [00:54<00:13, 13.41s/it]

Epoch 4, Loss: 0.8276603698730469, Validation Loss: 1.1466656724611919, Accuracy: 60.0%


100%|██████████| 5/5 [00:10<00:00,  2.19s/it]
100%|██████████| 5/5 [01:07<00:00, 13.45s/it]

Epoch 5, Loss: 0.5530712604522705, Validation Loss: 1.2692759831746419, Accuracy: 62.5%





Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.62      0.70        24
           1       0.69      0.53      0.60        17
           3       0.75      0.62      0.68        24
           4       0.39      0.73      0.51        15

    accuracy                           0.62        80
   macro avg       0.66      0.63      0.62        80
weighted avg       0.68      0.62      0.64        80

Confusion Matrix:
[[15  1  2  6]
 [ 1  9  2  5]
 [ 2  1 15  6]
 [ 1  2  1 11]]
Training on fold 3/3...


 60%|██████    | 3/5 [00:10<00:06,  3.39s/it]
  0%|          | 0/5 [00:10<?, ?it/s]


KeyboardInterrupt: 