In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
from torch import nn
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
from skimage.io import imread
import numpy as np
import os
import csv
import xml.etree.ElementTree as ET

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        resnet = resnet18(pretrained=True)
        resnet.avgpool = Identity()
        resnet.fc = Identity()

        self.downsampling = resnet

        self.cat1 = nn.Conv2d(64, 128, 1)
        self.cat2 = nn.Conv2d(128, 128, 1)
        self.cat3 = nn.Conv2d(256, 128, 1)
        
        self.upsampling_1 = nn.Sequential(nn.ReLU(),
                                          nn.ConvTranspose2d(512, 128, 3, 2, 1, 1)
                                         )

        self.upsampling_2 = nn.Sequential(nn.ReLU(),
                                          nn.BatchNorm2d(256),
                                          nn.ConvTranspose2d(256, 128, 3, 2, 1, 1)
                                         )

        self.upsampling_3 = nn.Sequential(nn.ReLU(),
                                          nn.BatchNorm2d(256),
                                          nn.ConvTranspose2d(256, 128, 3, 2, 1, 1)
                                         )

        self.upsampling_4 = nn.Sequential(nn.ReLU(),
                                         nn.BatchNorm2d(256),
                                         nn.ConvTranspose2d(256, 4, 3, 1, 1) 
                                        )

    def forward(self, x):
        x = self.downsampling.conv1(x)
        x = self.downsampling.bn1(x)
        x = self.downsampling.relu(x)
        x = self.downsampling.maxpool(x)

        down1 = self.downsampling.layer1(x)
        cat1 = self.cat1(down1)
        down2 = self.downsampling.layer2(down1)
        cat2 = self.cat2(down2)
        down3 = self.downsampling.layer3(down2)
        cat3 = self.cat3(down3)
        down4 = self.downsampling.layer4(down3)  

        up1 = self.upsampling_1(down4)
        up2 = self.upsampling_2(torch.cat((up1, cat3), 1))
        up3 = self.upsampling_3(torch.cat((up2, cat2), 1))
        up4 = self.upsampling_4(torch.cat((up3, cat1), 1))
        
        return up4       

### Convert xml to csv 

In [None]:
xml_folder_path = 'dataset/drive-download-20190813T171752Z-001/xml/'
csv_path = 'dataset/drive-download-20190813T171752Z-001/labels.csv'
img_path = 'dataset/drive-download-20190813T171752Z-001/xml/images/'

with open(csv_path, 'w') as csvfile:
    fieldnames = ['image_name', 'part_name', 'center_x', 'center_y']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    for xml_name in os.listdir(xml_folder_path):
        xml_path = xml_folder_path + xml_name
        tree = ET.parse(xml_path)
        root = tree.getroot()

        img_name = root.find('filename').text
        objects = root.findall('object')

        for part in objects:
            name = part.find('name').text
            object_bb = part.find('bndbox')
            xmin = int(object_bb.find('xmin').text)
            xmax = int(object_bb.find('xmax').text)
            ymin = int(object_bb.find('ymin').text)
            ymax = int(object_bb.find('ymax').text)

            center_x = (xmin+xmax) // 2
            center_y = (ymin+ymax) // 2

            writer.writerow({'image_name': img_name, 'part_name': name,
                             'center_x': center_x, 'center_y': center_y})            


In [19]:
class detection_dataset(Dataset):
    def __init__(self, img_size, img_folder_path, csv_path, transform=True):
        self.height = img_size[0]
        self.width = img_size[1]
        self.folder_path = img_folder_path
        self.img_files = os.listdir(img_folder_path)
        self.csv_path = csv_path
        self.transform = transform
        self.get_annotations()
        
    def __len__(self):
        return (len(self.img_files))
    
    def __getitem__(self, index):
        img_name = self.img_files[index]
        img = imread(self.folder_path + img_name)
        annotation = self.annotations[img_name]
        
        if self.transform:
            img, annotation = torch.from_numpy(img), torch.from_numpy(annotation)

        img = img.type(torch.FloatTensor)
        annotation = annotation.type(torch.FloatTensor)
        sample = {'image':img, 'annotation':annotation}

        return sample
          
    def get_annotations(self):
        self.annotations = defaultdict(lambda: np.zeros([4, self.height, self.width]))
        part_mapping = {'Head':0, 'Foot':1, 'Hand':2, 'Trunk':3}
        with open(self.csv_path) as csvfile:
            readCSV = csv.reader(csvfile)
            for row in readCSV:
                img_name = row[0]
                image_annotation = self.annotations[img_name]
                channel = part_mapping[row[1].capitalize()]
                # change this to be gaussian
                x = int(row[3])
                y = int(row[2])
                image_annotation[channel, x-8:x+8, y-8:y+8] = makeGaussian(16,8)

def makeGaussian(size, fwhm = 3, center=None):
    """ Make a square gaussian kernel.

    size is the length of a side of the square
    fwhm is full-width-half-maximum, which
    can be thought of as an effective radius.
    
    Reference: https://stackoverflow.com/a/14525830
    """

    x = np.arange(0, size, 1, float)
    y = x[:,np.newaxis]

    x0 = y0 = size // 2

    return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)

In [20]:
img_path = 'dataset/drive-download-20190813T171752Z-001/images/'
csv_path = 'dataset/drive-download-20190813T171752Z-001/labels.csv'

dg = detection_dataset((700,700), img_path, csv_path)
dataloader = DataLoader(dg, batch_size=20, shuffle=True)

In [None]:
criterion = nn.MSELoss()
model = Model()
optimizer = Adam(model.parameters())

model.cuda()
criterion.cuda()

epochs = 10

for epoch in range(epochs):
    for data in dataloader:
        img, bb = data['image'], data['annotation']
        img, bb = img.cuda(), bb.cuda()   
        pred = model(img)   
        loss = criterion(pred, bb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # calculate precision and false error percentage


In [37]:
# precision
# false error rate

annotation = np.zeros([4,100,100])
annotation[0, 10-8:10+8, 10-8:10+8].shape

(16, 16)

In [40]:

def makeGaussian(size, fwhm = 3, center=None):
    """ Make a square gaussian kernel.

    size is the length of a side of the square
    fwhm is full-width-half-maximum, which
    can be thought of as an effective radius.
    
    Taken from https://stackoverflow.com/a/14525830
    """

    x = np.arange(0, size, 1, float)
    y = x[:,np.newaxis]

    if center is None:
        x0 = y0 = size // 2
    else:
        x0 = center[0]
        y0 = center[1]

    return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)

out = makeGaussian(16,16)
np.around(out,2)

array([[0.25, 0.29, 0.34, 0.38, 0.42, 0.45, 0.48, 0.49, 0.5 , 0.49, 0.48,
        0.45, 0.42, 0.38, 0.34, 0.29],
       [0.29, 0.35, 0.4 , 0.45, 0.49, 0.53, 0.56, 0.58, 0.59, 0.58, 0.56,
        0.53, 0.49, 0.45, 0.4 , 0.35],
       [0.34, 0.4 , 0.46, 0.52, 0.57, 0.61, 0.65, 0.67, 0.68, 0.67, 0.65,
        0.61, 0.57, 0.52, 0.46, 0.4 ],
       [0.38, 0.45, 0.52, 0.58, 0.64, 0.69, 0.73, 0.75, 0.76, 0.75, 0.73,
        0.69, 0.64, 0.58, 0.52, 0.45],
       [0.42, 0.49, 0.57, 0.64, 0.71, 0.76, 0.81, 0.83, 0.84, 0.83, 0.81,
        0.76, 0.71, 0.64, 0.57, 0.49],
       [0.45, 0.53, 0.61, 0.69, 0.76, 0.82, 0.87, 0.9 , 0.91, 0.9 , 0.87,
        0.82, 0.76, 0.69, 0.61, 0.53],
       [0.48, 0.56, 0.65, 0.73, 0.81, 0.87, 0.92, 0.95, 0.96, 0.95, 0.92,
        0.87, 0.81, 0.73, 0.65, 0.56],
       [0.49, 0.58, 0.67, 0.75, 0.83, 0.9 , 0.95, 0.98, 0.99, 0.98, 0.95,
        0.9 , 0.83, 0.75, 0.67, 0.58],
       [0.5 , 0.59, 0.68, 0.76, 0.84, 0.91, 0.96, 0.99, 1.  , 0.99, 0.96,
        0.91, 0.84, 0.

array([[0.25, 0.34, 0.42, 0.48, 0.5 , 0.48, 0.42, 0.34],
       [0.34, 0.46, 0.57, 0.65, 0.68, 0.65, 0.57, 0.46],
       [0.42, 0.57, 0.71, 0.81, 0.84, 0.81, 0.71, 0.57],
       [0.48, 0.65, 0.81, 0.92, 0.96, 0.92, 0.81, 0.65],
       [0.5 , 0.68, 0.84, 0.96, 1.  , 0.96, 0.84, 0.68],
       [0.48, 0.65, 0.81, 0.92, 0.96, 0.92, 0.81, 0.65],
       [0.42, 0.57, 0.71, 0.81, 0.84, 0.81, 0.71, 0.57],
       [0.34, 0.46, 0.57, 0.65, 0.68, 0.65, 0.57, 0.46]])