Importing the libraries

In [196]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import numpy as np
import pandas as pd
import os 
import matplotlib.pyplot as plt
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torch.utils.data import Dataset,DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch import optim
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [197]:
from PIL import Image

class custom_dataset(Dataset):

    def __init__(self,annotations_file,img_dir,transform=None,target_transform=None):
        self.img_labels=pd.read_csv(annotations_file) #list of tuples
        self.img_dir=img_dir
        self.transform=transform
        self.target_transform=target_transform

    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self,idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = np.asarray((Image.open(img_path)).resize((256,256)))  #because we will use PIL to show image
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [198]:
target_transform = torchvision.transforms.Lambda(lambda y: torch.zeros(
    650, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))

Importing the dataset

In [199]:
training_dataset=custom_dataset(r'/Users/namanparuthi/Desktop/Rock Paper Scissors SXSW-3/train/_annotations.csv',
                             r'/Users/namanparuthi/Desktop/Rock Paper Scissors SXSW-3/train',
                             ToTensor(),
                             target_transform=target_transform)

testing_dataset=custom_dataset(r'/Users/namanparuthi/Desktop/Rock Paper Scissors SXSW-3/test/_annotations.csv',
                             r'/Users/namanparuthi/Desktop/Rock Paper Scissors SXSW-3/test',
                             ToTensor(),
                             target_transform=target_transform)

In [200]:
train_loader=DataLoader(training_dataset,batch_size=10,shuffle=True)
test_loader=DataLoader(testing_dataset,batch_size=10,shuffle=True)

Defining the object detecton model(R-CNN)

In [201]:
def create_object_detection_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

Defining the classification CNN(custom cnn)

In [202]:
class ClassificationCNN(nn.Module):
    def __init__(self, num_classes):
        super(ClassificationCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        


Combining the detection and classification models

In [203]:
class CombinedModel(nn.Module):
    def __init__(self, num_classes_detection, num_classes_classification):
        super().__init__()
        self.object_detection_model = create_object_detection_model(num_classes_detection)
        self.classification_model = ClassificationCNN(num_classes_classification)

    def forward(self, images):
        # Object Detection
        detection_output = self.object_detection_model(images)
        boxes = detection_output[0]['boxes']
        # Crop and classify detected objects
        object_images = [F.crop(images[i], int(box[1]), int(box[0]), int(box[3]) - int(box[1]), int(box[2]) - int(box[0])) for i, box in enumerate(boxes)]
        object_images = torch.stack(object_images)
        classification_output = self.classification_model(object_images)
        return detection_output, classification_output


Training the model

In [204]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_cmu = 0  # Initialize loss_cmu outside the inner loop
        for image, label in train_loader:
            model.train()
            outputs = model(image)
            loss_train = loss_fn(outputs, label)
            optimizer.zero_grad()
            loss_train.backward()
            optimizer.step()

            loss_cmu += loss_train.item()  # Accumulate the loss for the epoch

        if epoch == 1 or epoch % 10 == 0:
            print(f'{datetime.datetime.now()} Epoch {epoch}, train loss={loss_cmu}')

In [205]:
model=CombinedModel(num_classes_detection=3,num_classes_classification=3)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()
n_epochs = 1
optimizer = optimizer
model = model
loss_fn = loss_fn
train_loader = train_loader





In [206]:
training_loop(1,optimizer,model,loss_fn,train_loader)

AssertionError: targets should not be none when in training mode