In [2]:
from torch import cuda,optim
from PIL import Image
import torch
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms,models,datasets
from torchvision.ops import nms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd,numpy as np
import torchsummary
from torch import nn
from pathlib import Path
from pandas import DataFrame as df
import glob
device = 'cuda' if cuda.is_available() else 'cpu'
import logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler('app.log'), logging.StreamHandler()])
logger = logging.getLogger(__name__)
%matplotlib inline

DS_FOLDER = './data/open-images-bus-trucks/open-images-bus-trucks'
IMAGE_ROOT = DS_FOLDER + '/images'
df = pd.read_csv(DS_FOLDER+'/df.csv')
df.columns

2025-04-01 20:07:51,273 - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.
2025-04-01 20:07:52,142 - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.


Index(['ImageID', 'Source', 'LabelName', 'Confidence', 'XMin', 'XMax', 'YMin',
       'YMax', 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction',
       'IsInside', 'XClick1X', 'XClick2X', 'XClick3X', 'XClick4X', 'XClick1Y',
       'XClick2Y', 'XClick3Y', 'XClick4Y'],
      dtype='object')

In [6]:
label2targets = {l: t+1 for t,l in enumerate(df['LabelName'].unique())}
label2targets['background'] = 0
target2labels = {i:t for t,i in label2targets.items()}
background_class = label2targets['background']
num_classes = len(label2targets)

In [10]:
def process_img(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

In [11]:
class OpenDataset(Dataset):
    def __init__(self,df,images_path=IMAGE_ROOT):
        self.images_path = images_path
        self.files = glob.glob(self.images_path,'/*')
        self.df = df
        self.image_info = df['ImageID'].unique()
    def __getitem__(self, index):
        image_id = self.image_info.iloc[index]
        img_path = list(filter(lambda path : path.find(image_id)!=-1,self.files))[0]
        img = Image.open(img_path).convert("RGB")
        img = np.array(img.resize((self.w,self.h),resample = Image.BITLINEAR)/255)
        data = df[df['ImageID'] == image_id]
        labels = data['LabelName']
        data = data[['XMin','YMin','XMax','YMax']].values()
        data[:,[0,2]] *= self.w
        data[:,[1,3]] *= self.h

        boxes = data.astype(np.uint32).tolist()
        target = {}
        target['boxes'] = torch.Tensor(boxes).float()
        target['label'] = torch.Tensor([label2targets[i] for i in labels]).long()
        img = process_img(img)
        return img,target

    def collate_fn(self,batch):
        return tuple(*zip(batch))
    def __len__(self):
        return len(self.image_info)

In [12]:
from sklearn.model_selection import train_test_split

train_ids,test_ids = train_test_split(df.ImageID.unique(),test_size=.1)
train_df,test_df = df[df['ImageID'].isin(train_ids)],df[df['ImageID'].isin(test_ids)]
train_ds ,test_ds = OpenDataset(train_df),OpenDataset(test_df)

TypeError: glob() takes 1 positional argument but 2 were given

In [None]:
train_loader = DataLoader(train_ds, batch_size=4,collate_fn=train_ds.collate_fn,drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4,collate_fn=test_ds.collate_fn,drop_last=True)

NameError: name 'DataLoader' is not defined

In [4]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn,FasterRCNN_ResNet50_FPN_Weights
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def faster_model():
    model = fasterrcnn_resnet50_fpn(weights= FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [None]:
faster_model()

In [57]:
def train(inputs,model,optimizer):
    input,targets = inputs
    input = [image.to(device) for image in inputs ]
    targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input,targets)
    loss = sum(losses)
    loss.backward()
    optimizer.step()
    return loss,losses

@torch.no_grad
def eval(inputs,model):
    input,targets = inputs
    input = [image.to(device) for image in inputs ]
    targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
    losses = model(input,targets)
    loss = sum(losses)
    loss.backward()
    return loss,losses


In [7]:
images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 12, 4)
boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
labels = torch.randint(0, 3, (4, 12))
images = list(image for image in images)
targets = []
model = faster_model()
for i in range(len(images)):
    d = {}
    d['boxes'] = boxes[i]
    d['labels'] = labels[i]
    targets.append(d)
output = model(images, targets)