In [1]:
%pylab inline

import os
import numpy as np

from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F

Populating the interactive namespace from numpy and matplotlib


In [2]:
from torchvision.models import resnet34

class SomeNet(torch.nn.Module):
    def __init__(self, num_classes=1, anchors=[]):
        super().__init__()
        
        self.encoder = resnet34(pretrained=True)
        
        self.relu = nn.ReLU(inplace=True) 
        
        # TODO more channels
        self.conv1 = nn.Conv2d(512, 512, 3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(512)
        self.conv2 = nn.Conv2d(512, 512, 3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(512)
        self.conv3 = nn.Conv2d(512, 1024, 1)
        self.bn3 = nn.BatchNorm2d(1024)
        
        self.last = nn.Conv2d(1024, (5+num_classes)*len(anchors), 1)
        
        # TODO: multi-head later
        #self.conv_reg1 = nn.Conv2d(256, 5, 3, padding=1)
        #self.conv_class1 = nn.Conv2d(256, bottleneck, 1)
        #self.bn_class = nn.BatchNorm2d(bottleneck)
        #self.conv_class2 = nn.Conv2d(bottleneck, num_classes, 1)
    
    def forward(self, x):
        blocks = []
        
        #-> 3 640*640
        
        x = self.encoder.conv1(x)
        x = self.encoder.bn1(x)
        x = self.encoder.relu(x)
        x = self.encoder.maxpool(x)
        
        x = self.encoder.layer1(x) 
        x = self.encoder.layer2(x)        
        x = self.encoder.layer3(x)        
        x = self.encoder.layer4(x)
        #-> 512 20*20
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        #-> 1024 5*5
        
        x = self.last(x)
        return x

In [3]:
import json

with open('./data/train/labels.txt', 'r') as f:
    train_meta = json.load(f)
with open('./data/test/labels.txt', 'r') as f:
    test_meta = json.load(f)
with open('./data/subjects.txt', 'r') as f:
    subj_meta = json.load(f)

In [4]:
from torch.utils.data import DataLoader
from torchvision import transforms

from FaceDataset import FaceDataset
from region_loss import RegionLoss

torch.cuda.empty_cache()

trainset = FaceDataset(train_meta, None)
testset = FaceDataset(test_meta, None)

trainloader = DataLoader(trainset, batch_size=2, shuffle=True)
testloader = DataLoader(testset, batch_size=2, shuffle=False)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

net = SomeNet(num_classes=285, anchors=[(60,60), (160,160), (240,240)]).to(device)

In [5]:
for batch in trainloader:
    break
batch['img'].shape

torch.Size([2, 3, 640, 640])

In [6]:
lr = 1e-4

criterion = RegionLoss(num_classes=285, anchors=[(60,60), (160,160), (240,240)], num_anchors=3)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

In [9]:
def train(verbose=True, info_step=100):
    net.train()
    running_loss = 0.0
    train_loss = 0.0
    
    losses = [0 for i in range(4)]
    running_losses = [0 for i in range(4)]
    
    for i, batch in enumerate(trainloader):
        optimizer.zero_grad()
        
        output = net(batch['img'].to(device))
        loss, info = criterion(output, batch['target'].to(device))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        train_loss += loss.item()
        
        for j in range(len(losses)):
            losses[j] += info[j]
            running_losses[j] += info[j]
        
        if (i+1) % info_step == 0:
            print(' [{} - {}],\ttrain loss: {:.5}'.format(epoch+1, i+1, running_loss/info_step))
            running_loss = 0.0
            
            for j in range(len(losses)):
                running_losses[j] /= info_step
            print(' coord loss: {:.5} \tobj loss: {:.5} \tclass loss: {:.5} \tacc: {:.5}'.format(*running_losses))
            running_losses = [0 for j in range(4)]
            
    train_loss /= len(trainset)
    
    for i in range(len(losses)):
        losses[i] /= len(trainset)
        
    print('\n [{}], \ttrain loss: {:.5}'.format(epoch+1, train_loss))
    print(' coord loss: {:.5} \tobj loss: {:.5} \tclass loss: {:.5} \tacc: {:.5}'.format(*losses))
    print()
    return train_loss, losses
  

def validate():
    net.eval()
    losses = [0 for i in range(4)]
    val_loss = 0.0
    for i, batch in enumerate(testloader):
        with torch.no_grad():
            output = net(batch['img'].to(device))
            loss, info  = criterion(output, batch['target'].to(device))
        
        val_loss += loss.detach().item()
        for j in range(len(losses)):
            losses[j] += info[j]
            
    val_loss /= len(testset)
    for i in range(len(losses)):
        losses[i] /= len(testset)
        
    print(' [{}], \tval loss: {:.5}\n'.format(epoch+1, val_loss))
    print(' coord loss: {:.5} \tobj loss: {:.5} \tclass loss: {:.5} \tacc: {:.5}'.format(*losses))
    print()
    return val_loss, info

In [10]:
import time

num_epoch = 1
history = []
detailed_history = []

for epoch in range(num_epoch):
    train_loss, train_info = train(info_step=80)
    val_loss, test_info = validate()
    history.append((train_loss, val_loss))
    detailed_history.append((train_info, test_info))

 [1 - 80],	train loss: 57.135
 coord loss: 0.39881 	obj loss: 0.36042 	class loss: 56.376 	acc: 0.0125
 [1 - 160],	train loss: 56.848
 coord loss: 0.19964 	obj loss: 0.24367 	class loss: 56.404 	acc: 0.0375
 [1 - 240],	train loss: 56.543
 coord loss: 0.11432 	obj loss: 0.18228 	class loss: 56.246 	acc: 0.075


IndexError: too many indices for array

In [None]:
#Test
from utils import non_max_suppression

testloader = DataLoader(testset, batch_size=1, shuffle=False)

net.eval()
acc = 0
for batch in testloader:
    with torch.no_grad():
        output = net(batch['img'].to(device))
        
        nB,_,nH,nW = output.size()
        nC = 285
        nA = 3

        anchors = criterion.anchors
        stride = 1

        prediction = output.view(nB, nA, (4+1+nC), nH, nW)         # reshape for convenience
        prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()    # Get bbox_attr dimention to be the last

        # Get attributes from output tensor
        prediction[..., 0] = torch.sigmoid(prediction[..., 0])  # Center x
        prediction[..., 1] = torch.sigmoid(prediction[..., 1])  # Center y
        prediction[..., 4] = torch.sigmoid(prediction[..., 4])  # Conf
        prediction[..., 5:] = torch.sigmoid(prediction[..., 5:])  # Cls distribution

        # Calculate offsets for each grid       
        grid_x = torch.arange(nW, dtype=torch.float32).repeat(nW, 1).view([1, 1, nH, nW]).to(device)
        grid_y = torch.arange(nH, dtype=torch.float32).repeat(nH, 1).t().view([1, 1, nH, nW]).to(device)
        scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors]).to(device)
        anchor_w = scaled_anchors[:, 0].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        prediction[..., 0] = prediction[..., 0] + grid_x
        prediction[..., 1] = prediction[..., 1] + grid_y
        prediction[..., 2] = torch.exp(prediction[..., 2]) * anchor_w
        prediction[..., 3] = torch.exp(prediction[..., 3]) * anchor_h

        prediction = prediction.view(nB, nA*nH*nW, 4+1+nC)
        label = non_max_suppression(prediction, nC)[0][-1][-1].cpu().item()
        true_label = batch['target'][0][-1][-1].item()
        
        if label==true_label:
            acc+=1
acc/len(testset)

In [None]:
net.eval()

test_transforms = transforms.Compose([
    transforms.CenterCrop(640),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

filepath = './data/test/1/27-02.jpg'
img = Image.open(filepath)
tensor = test_transforms(img).unsqueeze(0)

with torch.no_grad():
    output = net(tensor.to(device))

In [None]:
nB,_,nH,nW = output.size()
nC = 285
nA = 3

anchors = criterion.anchors
stride = 1

prediction = output.view(nB, nA, (4+1+nC), nH, nW)         # reshape for convenience
prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()    # Get bbox_attr dimention to be the last

# Get attributes from output tensor
prediction[..., 0] = torch.sigmoid(prediction[..., 0])  # Center x
prediction[..., 1] = torch.sigmoid(prediction[..., 1])  # Center y
prediction[..., 4] = torch.sigmoid(prediction[..., 4])  # Conf
prediction[..., 5:] = torch.sigmoid(prediction[..., 5:])  # Cls distribution

# Calculate offsets for each grid       
grid_x = torch.arange(nW, dtype=torch.float32).repeat(nW, 1).view([1, 1, nH, nW]).to(device)
grid_y = torch.arange(nH, dtype=torch.float32).repeat(nH, 1).t().view([1, 1, nH, nW]).to(device)
scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors]).to(device)
anchor_w = scaled_anchors[:, 0].view((1, nA, 1, 1))
anchor_h = scaled_anchors[:, 1].view((1, nA, 1, 1))

# Add offset and scale with anchors
prediction[..., 0] = prediction[..., 0] + grid_x
prediction[..., 1] = prediction[..., 1] + grid_y
prediction[..., 2] = torch.exp(prediction[..., 2]) * anchor_w
prediction[..., 3] = torch.exp(prediction[..., 3]) * anchor_h

prediction = prediction.view(nB, nA*nH*nW, 4+1+nC)

In [None]:
all_detections = []

#for pred in a:  
pred = non_max_suppression(prediction, nC)[0]

all_detections.append([np.array([]) for _ in range(nC)])

pred_boxes = pred[:, :4].cpu().numpy()
scores = pred[:, 4].cpu().numpy()
pred_labels = pred[:, -1].cpu().numpy()

sort_i = np.argsort(scores)
pred_labels = pred_labels[sort_i]
pred_boxes = pred_boxes[sort_i]

for label in range(nC):
    all_detections[-1][label] = pred_boxes[pred_labels == label]
    
pred_labels

In [None]:
from PIL import ImageDraw

draw_img = transforms.CenterCrop(640)(img)

box = pred_boxes[-1]/5*640

draw = ImageDraw.Draw(draw_img)
draw.rectangle(box)
del draw

draw_img