In [1]:
import torch
import torch.nn as nn
import numpy
import cv2
import random


In [2]:


# Define a single Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride=1, downsample=False):
        super(ResidualBlock, self).__init__()
        self.downsample = downsample
        self.stride = stride

        # Main branch (1x1 -> 3x3 -> 1x1 convolutions)
        self.branch_2 = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
        )

        # Shortcut branch (for downsampling or channel matching)
        if self.downsample:
            self.branch_1 = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
        else:
            self.branch_1 = None

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # Shortcut connection
        shortcut = self.branch_1(x) if self.downsample else x
        # Main branch
        x = self.branch_2(x)
        # Add shortcut and apply activation
        x += shortcut
        return self.relu(x)


# Define ResNet Stages
class ResNet50(nn.Module):
    def __init__(self, num_classes=4):
        super(ResNet50, self).__init__()
        # Initial convolution and max-pooling
        self.stage_1 = nn.Sequential(
            nn.ZeroPad2d(3),
            nn.Conv2d(3, 64, kernel_size=7, stride=2, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

        # Stage 2: 3 blocks
        self.stage_2 = self._make_stage(64, 64, 256, num_blocks=3, stride=1)

        # Stage 3: 4 blocks
        self.stage_3 = self._make_stage(256, 128, 512, num_blocks=4, stride=2)

        # Stage 4: 6 blocks
        self.stage_4 = self._make_stage(512, 256, 1024, num_blocks=6, stride=2)

        # Stage 5: 3 blocks
        self.stage_5 = self._make_stage(1024, 512, 2048, num_blocks=3, stride=2)

        # Final layers: Global Average Pooling and Fully Connected
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048, num_classes)

    def _make_stage(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        # First block with downsampling
        blocks = [ResidualBlock(in_channels, mid_channels, out_channels, stride=stride, downsample=True)]
        # Remaining blocks
        for _ in range(1, num_blocks):
            blocks.append(ResidualBlock(out_channels, mid_channels, out_channels, stride=1))
        return nn.Sequential(*blocks)

    def forward(self, x):
        x = self.stage_1(x)
        x = self.stage_2(x)
        x = self.stage_3(x)
        x = self.stage_4(x)
        x = self.stage_5(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


# Instantiate and print the model summary
model = ResNet50(num_classes=1)
# print(model)


In [3]:
output=model(torch.randn(5,3,224,224))
output.shape

torch.Size([5, 1])

In [4]:
!unzip /content/dogcatmini.zip -d /content/dogcatmini

Archive:  /content/dogcatmini.zip
  inflating: /content/dogcatmini/cats_set/cat.4001.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4002.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4003.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4004.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4005.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4006.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4007.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4008.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4009.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4010.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4011.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4012.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4013.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4014.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4015.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4016.jpg  
  inflating: /content/dogcatmini/cats_set/cat.4017.jpg

In [5]:
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim
from PIL import Image
import numpy as np
import os
import torch
from torchvision import transforms

# Define your transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor(),         # Convert to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])
dog_path='/content/dogcatmini/dogs_set'
cat_path='/content/dogcatmini/cats_set'
class CustomDataset(Dataset):
    def __init__(self, dog_path, cat_path, transform=None):
        # Paths to directories
        self.dog_path = dog_path
        self.cat_path = cat_path

        # Collect file paths
        self.dog_files = [os.path.join(self.dog_path, f) for f in os.listdir(self.dog_path)]
        self.cat_files = [os.path.join(self.cat_path, f) for f in os.listdir(self.cat_path)]

        # Combine data and labels
        self.data = self.dog_files + self.cat_files
        self.labels = np.array([0] * len(self.dog_files) + [1] * len(self.cat_files))
 # 0: dog, 1: cat

        # Store transformations
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load image
        img_path = self.data[idx]
        image = Image.open(img_path).convert('RGB')  # Ensure 3-channel RGB image

        # Apply transformations (if any)
        if self.transform:
            image = self.transform(image)

        # Get label
        label = self.labels[idx]

        return image, torch.tensor(label, dtype=torch.long)

access=CustomDataset(dog_path, cat_path, transform)
total_size = len(access)
train_size = int(0.7 * total_size)
val_size = total_size - train_size
train_dataset, val_dataset = random_split(access, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"Total dataset size: {total_size}")
print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

Total dataset size: 1000
Training dataset size: 700
Validation dataset size: 300


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Instantiate the model
model = ResNet50(num_classes=1)  # Output is a single logit for binary classification
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training function
def train_model(model, dataloader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        correct = 0
        total = 0

        # Loop over batches
        for images, labels in tqdm(dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device).float()  # Ensure labels are float

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images).squeeze()  # Squeeze for matching label dimensions

            # Compute loss
            loss = criterion(outputs, labels)
            epoch_loss += loss.item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Calculate accuracy
            preds = torch.sigmoid(outputs) > 0.5  # Apply sigmoid and threshold
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        # Print epoch summary
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss/len(dataloader):.4f}, Accuracy: {correct/total:.4f}")

# Assuming `train_loader` is your DataLoader
train_model(model, train_loader, criterion, optimizer, num_epochs=10)


Epoch 1/10: 100%|██████████| 22/22 [00:11<00:00,  1.94it/s]


Epoch 1/10, Loss: 0.7099, Accuracy: 0.5586


Epoch 2/10: 100%|██████████| 22/22 [00:09<00:00,  2.37it/s]


Epoch 2/10, Loss: 0.6600, Accuracy: 0.6400


Epoch 3/10: 100%|██████████| 22/22 [00:08<00:00,  2.56it/s]


Epoch 3/10, Loss: 0.5723, Accuracy: 0.7129


Epoch 4/10: 100%|██████████| 22/22 [00:08<00:00,  2.54it/s]


Epoch 4/10, Loss: 0.5326, Accuracy: 0.7657


Epoch 5/10: 100%|██████████| 22/22 [00:08<00:00,  2.55it/s]


Epoch 5/10, Loss: 0.4676, Accuracy: 0.7757


Epoch 6/10: 100%|██████████| 22/22 [00:08<00:00,  2.61it/s]


Epoch 6/10, Loss: 0.3827, Accuracy: 0.8157


Epoch 7/10: 100%|██████████| 22/22 [00:08<00:00,  2.47it/s]


Epoch 7/10, Loss: 0.2816, Accuracy: 0.8900


Epoch 8/10: 100%|██████████| 22/22 [00:08<00:00,  2.47it/s]


Epoch 8/10, Loss: 0.3199, Accuracy: 0.8529


Epoch 9/10: 100%|██████████| 22/22 [00:08<00:00,  2.55it/s]


Epoch 9/10, Loss: 0.1688, Accuracy: 0.9443


Epoch 10/10: 100%|██████████| 22/22 [00:08<00:00,  2.47it/s]

Epoch 10/10, Loss: 0.0674, Accuracy: 0.9757





In [7]:
#example of modifying Resnet50 head

# class Resnet50Modified(nn.Module):
#   def __init__(self, num_classes):
#     super(Resnet50Modified, self).__init__()
#     self.backbone=ResNet50(num_classes=None)
#     self.backbone.fc=nn.Identity()

#     self.new_fc=nn.Sequential(
#         nn.Linear(2048, 256),
#         nn.ReLU(),
#         nn.Linear(256, 128),
#         nn.ReLU(),
#         nn.Linear(128, num_classes)
#     )

#   def forward(self, x):
#     x=self.backbone(x)
#     x=self.new_fc(x)
#     return x