In [6]:
%cd c:/Users/realc/OneDrive/Documents/UGR/kpr_nn
%pwd

c:\Users\realc\OneDrive\Documents\UGR\kpr_nn


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


'c:\\Users\\realc\\OneDrive\\Documents\\UGR\\kpr_nn'

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

from utils import data_prep

## Load the data

In [8]:
transform = transforms.Compose([
    transforms.Resize((80, 80)),
    transforms.ToTensor(),
])

dataset = data_prep.ConeDataset("data/cone_img", "data/annotations", transform=transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
print(len(dataset))

19490


In [9]:
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        
    def forward(self, x):
        identity = self.shortcut(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

class KPR_NN(nn.Module):
    def __init__(self, num_keypoints=6):
        super(KPR_NN, self).__init__()
        self.initial_conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=6, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        
        self.res_block1 = ResNetBlock(64, 64)
        self.res_block2 = ResNetBlock(64, 128, stride=2)
        self.res_block3 = ResNetBlock(128, 256, stride=2)
        self.res_block4 = ResNetBlock(256, 512, stride=2)
        
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_keypoints * 2)
        
    def forward(self, x):
        x = self.initial_conv(x)
        x = self.res_block1(x)
        x = self.res_block2(x)
        x = self.res_block3(x)
        x = self.res_block4(x)
        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

model = KPR_NN()
print(model)

KPR_NN(
  (initial_conv): Sequential(
    (0): Conv2d(3, 64, kernel_size=(6, 6), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (res_block1): ResNetBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (shortcut): Sequential()
  )
  (res_block2): ResNetBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stri

In [10]:
def train_model(model, dataloader, num_epochs=20, lr=1e-3, device="cuda"):
    model.to(device)
    # Try MSE to begin with, then implement the proposed cross-ratio loss
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, keypoints in dataloader:
            images, keypoints = images.to(device), keypoints.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, keypoints.view(outputs.shape))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    return model

device = "cuda" if torch.cuda.is_available() else "cpu"
trained_model = train_model(model, data_loader, num_epochs=20, lr=1e-3, device=device)


RuntimeError: stack expects each tensor to be equal size, but got [6, 2] at entry 0 and [7, 2] at entry 21