In [1]:
import json
import math
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn as faster_rcnn
import torchvision.models.detection as d
from torchvision import transforms
from PIL import Image
import os
from torch.optim import Adam
import numpy as np
from torch.utils.data import Dataset, DataLoader
import pickle

In [2]:
with open ('labels_train.json', 'r', encoding='utf-8') as file:
    labels = json.load(file)
labels[0]

{'id': 28,
 'annotations': [{'id': 162,
   'completed_by': 1,
   'result': [{'original_width': 47,
     'original_height': 50,
     'image_rotation': 0,
     'value': {'x': 28.528270545319657,
      'y': 12.97576176416152,
      'width': 47.85387317279425,
      'height': 22.491320391213307,
      'rotation': 0,
      'rectanglelabels': ['Airplane']},
     'id': 'PPivq2L9kh',
     'from_name': 'label',
     'to_name': 'image',
     'type': 'rectanglelabels',
     'origin': 'manual'}],
   'was_cancelled': False,
   'ground_truth': False,
   'created_at': '2023-11-09T07:56:58.821955Z',
   'updated_at': '2023-11-09T07:56:58.822004Z',
   'draft_created_at': '2023-11-08T12:40:04.847593Z',
   'lead_time': 32.876,
   'prediction': {},
   'result_count': 0,
   'unique_id': '8a7257a6-d118-41fe-a31e-62abeed35a14',
   'import_id': None,
   'last_action': None,
   'task': 28,
   'project': 2,
   'updated_by': 1,
   'parent_prediction': None,
   'parent_annotation': None,
   'last_created_by': None

In [3]:
# sprawdamy max_widht i max_height iterujemy po zdjeciach, 
# robimy carny padding symetryccznie i przesuawmy dla danego zdjecia ospowiednio o tyle bb
max_height = 0
max_width = 0

for path in os.listdir('./photos/train/'):
    img = Image.open(os.path.join('./photos/train/', path))
    w, h = img.size
    max_width = max(w, max_width)
    max_height = max(h, max_height)

max_height, max_width



(606, 477)

In [4]:
def add_margin(pil_img, top, right, bottom, left, color):
    width, height = pil_img.size
    new_width = width + right + left
    new_height = height + top + bottom
    result = Image.new(pil_img.mode, (new_width, new_height), color)
    result.paste(pil_img, (left, top))
    return result

In [5]:
def find_dict_by_id(list_of_dicts, target_id):
    for dictionary in list_of_dicts:
        if 'id' in dictionary and dictionary['id'] == target_id:
            return dictionary
    return None

In [6]:
compose = transforms.Compose([
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float)
])

tensors_image = []
boxes_list = []
labels_list = []

for path in os.listdir('./photos/train/'):
    img = Image.open(os.path.join('./photos/train/', path))
    w, h = img.size

    img = add_margin(img, (max_height - h)//2, (max_width - w)//2, math.ceil((max_height - h)/2), math.ceil((max_width - w)/2), (0,0,0))
    tensors_image.append(compose(img))

    id_ = int(path.split('.')[0])
    # id_ = 28
    # print(id_)
    label = find_dict_by_id(labels, id_)
    coordinates = label['annotations'][0]['result'][0]['value']
    # print(coordinates)
    boxes_list.append([coordinates['x'] + (max_width - w)//2, 
                        coordinates['y'] + (max_height - h)//2, 
                        coordinates['x'] + (max_width - w)//2 + coordinates['width'],
                        coordinates['y'] + (max_height - h)//2 + coordinates['height']])
    labels_list.append(1)

    # img.show()
    # break
# print(type(boxes_list))

In [7]:
y_train_boxes = torch.tensor(boxes_list, dtype=torch.float)
y_train_labels = torch.tensor(labels_list, dtype=torch.int64)

y_train = {'boxes': y_train_boxes, 'labels': y_train_labels}
x_train = tensors_image
# type(y_train)
# y_train.shape
# iteruje po wszystkich sciezkach image.open(obraz) i potem compose(image) arr z tych tensowrow powstalych z compose
# torch stack czy jakos tak

In [8]:
model = faster_rcnn(weights=d.FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\olcia/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100.0%


In [57]:
class ImageDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
      return len(self.images)
    
    def __getitem__(self, idx):
      return self.images[idx], {'boxes': self.labels['boxes'][idx], 'labels': self.labels['labels'][idx]}

In [58]:
dataLoader = DataLoader(ImageDataset(x_train, y_train), batch_size=10)

In [59]:
y_train['boxes'][0]

tensor([ 24.7658, 135.6987,  41.7767, 146.1655])

In [72]:
optimiser = Adam(model.parameters())
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = model.to(device)

# torch.DataLoader - zwraca w bachach zdjecia i bb 

def train(model, optimiser, data, epochs=20):
    loss_epoch_mean = []
    for epoch in range(epochs):
        loss_list = np.array([])
        for x, y in data:
            x = x.to(device)
            optimiser.zero_grad()
            y_list = []
            a, _ = y['boxes'].shape
            for i in range(a):
                y_list.append({'boxes': y['boxes'][i].reshape(1, -1), 'labels': torch.tensor([1])})

            print(y_list[0]['labels'].shape)
            loss_dict = model(x, y_list)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimiser.step()
            loss_list = np.append(loss_list, losses.item())
        epoch_mean = np.mean(loss_list)
        loss_epoch_mean.append(epoch_mean)
    return model, loss_epoch_mean

In [73]:
model, loss_epoch_mean = train(model, optimiser, dataLoader, 20)

torch.Size([10])


In [None]:
type(model)

torchvision.models.detection.faster_rcnn.FasterRCNN

In [None]:
with open('model.p', 'wb') as f:
  pickle.dump(model, f)