In [1]:
import json

with open('/Users/kriskasira/Documents/RiProject/archive/Batch 3/import-videos/ann/instances.json', 'r') as file:
    podaci = json.load(file)

In [6]:
podaci['images']

[{'license': 'None',
  'file_name': 'Football batch 3.mp4_00000.jpg',
  'url': 'None',
  'height': 1080,
  'width': 1920,
  'date_captured': '2023-04-15T08:43:47.612Z',
  'id': 16640924},
 {'license': 'None',
  'file_name': 'Football batch 3.mp4_00001.jpg',
  'url': 'None',
  'height': 1080,
  'width': 1920,
  'date_captured': '2023-04-15T08:43:47.612Z',
  'id': 16640925},
 {'license': 'None',
  'file_name': 'Football batch 3.mp4_00002.jpg',
  'url': 'None',
  'height': 1080,
  'width': 1920,
  'date_captured': '2023-04-15T08:43:47.612Z',
  'id': 16640926},
 {'license': 'None',
  'file_name': 'Football batch 3.mp4_00003.jpg',
  'url': 'None',
  'height': 1080,
  'width': 1920,
  'date_captured': '2023-04-15T08:43:47.612Z',
  'id': 16640927},
 {'license': 'None',
  'file_name': 'Football batch 3.mp4_00004.jpg',
  'url': 'None',
  'height': 1080,
  'width': 1920,
  'date_captured': '2023-04-15T08:43:47.612Z',
  'id': 16640928},
 {'license': 'None',
  'file_name': 'Football batch 3.mp4_00

In [1]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os

class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.images[idx])
        mask_name = os.path.join(self.mask_dir, self.images[idx].replace('.jpg', '.png'))
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        return image, mask

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

train_dataset = CustomDataset("path_to_train_images", "path_to_train_masks", transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

FileNotFoundError: [Errno 2] No such file or directory: 'path_to_train_images'

In [3]:
from torchvision import models
import torch.nn as nn

class Backbone(nn.Module):
    def __init__(self):
        super(Backbone, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-2])
    
    def forward(self, x):
        x = self.backbone(x)
        return x

backbone = Backbone()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/korisnik/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████████████████████████████████| 97.8M/97.8M [00:37<00:00, 2.71MB/s]


In [None]:
class SemanticSegmentationHead(nn.Module):
    def __init__(self, num_classes):
        super(SemanticSegmentationHead, self).__init__()
        self.conv1 = nn.Conv2d(2048, 512, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.conv2 = nn.Conv2d(512, num_classes, kernel_size=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = nn.ReLU()(x)
        x = self.conv2(x)
        x = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)(x)
        return x

semantic_head = SemanticSegmentationHead(num_classes=21)  # Broj klasa u semantičkoj segmentaciji

In [None]:
class InstanceSegmentationHead(nn.Module):
    def __init__(self):
        super(InstanceSegmentationHead, self).__init__()
        self.conv1 = nn.Conv2d(2048, 256, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(256)
        self.conv2 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 1, kernel_size=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = nn.ReLU()(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = nn.ReLU()(x)
        x = self.conv3(x)
        x = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)(x)
        return x

instance_head = InstanceSegmentationHead()

In [None]:
class PanopticSegmentationModel(nn.Module):
    def __init__(self, backbone, semantic_head, instance_head):
        super(PanopticSegmentationModel, self).__init__()
        self.backbone = backbone
        self.semantic_head = semantic_head
        self.instance_head = instance_head

    def forward(self, x):
        features = self.backbone(x)
        semantic_output = self.semantic_head(features)
        instance_output = self.instance_head(features)
        return semantic_output, instance_output

model = PanopticSegmentationModel(backbone, semantic_head, instance_head).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, dataloader, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, masks in dataloader:
            inputs = inputs.to(device)
            masks = masks.to(device)
            
            optimizer.zero_grad()

            semantic_outputs, instance_outputs = model(inputs)
            semantic_loss = criterion(semantic_outputs, masks)
            instance_loss = criterion(instance_outputs, masks)
            loss = semantic_loss + instance_loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(dataloader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')

    return model

model = train_model(model, train_loader, criterion, optimizer, num_epochs=25)