In [13]:
from torch import cuda,optim
from PIL import Image
import torch
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms,models,datasets
from torchvision.ops import nms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd,numpy as np
import torchsummary
from torch import nn
from pathlib import Path
from pandas import DataFrame as df
from glob import glob
device = 'cuda' if cuda.is_available() else 'cpu'
import logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler('app.log'), logging.StreamHandler()])
logger = logging.getLogger(__name__)
%matplotlib inline

DS_FOLDER = './data/open-images-bus-trucks/open-images-bus-trucks'
IMAGE_ROOT = DS_FOLDER + '/images'
df = pd.read_csv(DS_FOLDER+'/df.csv')
df.columns

2025-04-02 14:46:08,889 - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.


Index(['ImageID', 'Source', 'LabelName', 'Confidence', 'XMin', 'XMax', 'YMin',
       'YMax', 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction',
       'IsInside', 'XClick1X', 'XClick2X', 'XClick3X', 'XClick4X', 'XClick1Y',
       'XClick2Y', 'XClick3Y', 'XClick4Y'],
      dtype='object')

In [9]:
label2targets = {l: t+1 for t,l in enumerate(df['LabelName'].unique())}
label2targets['background'] = 0
target2labels = {i:t for t,i in label2targets.items()}
background_class = label2targets['background']
num_classes = len(label2targets)

In [84]:
sum({'hello':1,'one':2}.values())

3

In [10]:
def process_img(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

In [71]:
class OpenDataset(Dataset):
    def __init__(self,df,images_path=IMAGE_ROOT):
        self.w,self.h = 224,224
        self.images_path = images_path
        self.files = glob(images_path + '/*')
        self.df = df
        self.image_info = df['ImageID'].unique()
    def __getitem__(self, index):
        image_id = self.image_info[index]
        img_path = list(filter(lambda path : path.find(image_id)!=-1,self.files))[0]
        img = Image.open(img_path).convert("RGB")
        img = np.array(img.resize((self.w,self.h),resample = Image.BILINEAR))/255
        data = df[df['ImageID'] == image_id]
        labels = data['LabelName']
        data = data[['XMin','YMin','XMax','YMax']].values
        data[:,[0,2]] *= self.w
        data[:,[1,3]] *= self.h

        boxes = data.astype(np.uint32).tolist()
        target = {}
        target['boxes'] = torch.Tensor(boxes).float()
        target['labels'] = torch.Tensor([label2targets[i] for i in labels]).long()
        img = process_img(img)
        return img,target

    def collate_fn(self,batch):
        return tuple(zip(*batch))
    def __len__(self):
        return len(self.image_info)

In [72]:
from sklearn.model_selection import train_test_split

train_ids,test_ids = train_test_split(df.ImageID.unique(),test_size=.1)
train_df,test_df = df[df['ImageID'].isin(train_ids)],df[df['ImageID'].isin(test_ids)]
train_ds ,test_ds = OpenDataset(train_df),OpenDataset(test_df)

In [73]:
train_loader = DataLoader(train_ds, batch_size=4,collate_fn=train_ds.collate_fn,drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4,collate_fn=test_ds.collate_fn,drop_last=True)

In [74]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn,FasterRCNN_ResNet50_FPN_Weights
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def faster_model():
    model = fasterrcnn_resnet50_fpn(weights= FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [75]:
faster_model()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [95]:
def train_batch(inputs,model,optimizer):
    input,targets = inputs
    input = [image.to(device) for image in input ]
    targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input,targets)
    loss = sum(losses.values())
    loss.backward()
    optimizer.step()
    return loss,losses

@torch.no_grad
def eval_batch(inputs,model):
    input,targets = inputs
    input = [image.to(device) for image in input ]
    targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
    losses = model(input,targets)
    loss = sum(losses.values())
    loss.backward()
    return loss,losses


images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 12, 4)
boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
labels = torch.randint(0, 3, (4, 12))
images = list(image for image in images)
targets = []
model = faster_model()
for i in range(len(images)):
    d = {}
    d['boxes'] = boxes[i]
    d['labels'] = labels[i]
    targets.append(d)
output = model(images, targets)

In [None]:
model = faster_model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,momentum=0.9,weight_decay=0.0005)
n_epochs = 4
train_epoch_accs ,test_epoch_accs,train_epoch_loss,test_epoch_loss = [],[],[],[]
for ex in range(n_epochs):
    N = len(train_loader)
    train_losses, train_accs = [],[]
    for bx, data in enumerate(train_loader):
        loss, losses = train_batch(data,model, optimizer)
        loc_loss, regr_loss, loss_objectness, \
            loss_rpn_box_reg = [losses[k] for k in ['loss_classifier', \
                'loss_box_reg', 'loss_objectness', \
                'loss_rpn_box_reg']]
        train_losses.append(loss)
        logger.debug(f'train loss = {loss},train regr_loss is {regr_loss}  on bx {bx}')
    
    train_epoch_accs.append(np.mean(train_accs))
    train_epoch_loss.append(np.mean(train_losses))
    N = len(test_loader)
    test_losses, test_accs = [],[]
    for bx, data in enumerate(test_loader):
        loss, losses = train_batch(data,model, optimizer)
        loc_loss, regr_loss, loss_objectness, \
            loss_rpn_box_reg = [losses[k] for k in ['loss_classifier', \
                'loss_box_reg', 'loss_objectness', \
                'loss_rpn_box_reg']]
        test_losses.append(loss)
        logger.debug(f'val loss = {loss},train regr_loss is {regr_loss}')

        
    test_epoch_accs.append(np.mean(test_accs))
    test_epoch_loss.append(np.mean(test_losses))

2025-04-02 16:07:48,785 - DEBUG - train loss = 1.5362147092819214,train regr_loss is 0.18507817387580872  on bx 0
2025-04-02 16:09:19,028 - DEBUG - train loss = 0.8163874745368958,train regr_loss is 0.21495622396469116  on bx 1
