In [1]:
import os
from pycocotools.coco import COCO
import numpy as np
import torch.utils.data as data
import torch
from heatmap import heatmaps_from_keypoints
from imageio import imread
from skimage.transform import resize
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from torch.nn import init
from torch.autograd.variable import Variable
import matplotlib.pyplot as plt
import pickle

In [2]:
MAIN_FOLDER = "/Volumes/TOSHIBA EXT/data/"
IMAGES_FOLDER = os.path.join(MAIN_FOLDER, "train2017")
IMAGES_FOLDER_TEST = os.path.join(MAIN_FOLDER, "val2017")
ANNOTATION_FILE = os.path.join(MAIN_FOLDER, "annotations/person_keypoints_train2017.json")
ANNOTATION_FILE_TEST = os.path.join(MAIN_FOLDER, "annotations/person_keypoints_val2017.json")
CHECKPOINTS_FOLDER = "./cktp/"

### Heatmap

In [3]:
def gaussian_heatmap(shape, keypoint_coordinates, std = 1.5):
    """
        Computes a square gaussian kernel

        :param shape: Shape of the output heatmap
        :param keypoint_coordinates: Location of the keypoint
        :param std: Standard deviation

        :return: Heatmap of shape (1,shape,shape)
    """
    
    # Get the coordinates
    x = keypoint_coordinates[0]
    y = keypoint_coordinates[1]
    
    a = np.arange(0, shape, 1, float)
    b = a[:,np.newaxis]

    # Generate the heatmap
    heatmap_raw = np.exp(-(((a-x)**2)/(2*std**2) + ((b-y)**2)/(2*std**2)))
    
    # Normalize
    heatmap_max = np.amax(heatmap_raw)
    heatmap_normalized = heatmap_raw/heatmap_max
    
    # Get it in the accurate format
    heatmap = np.expand_dims(heatmap_raw, axis=0)
    return heatmap

def gaussian_heatmaps(xs, ys, vs, shape=32, image_height=512, image_width=640, std=1.):
    """
        Computes heatmaps from the keypoints
        :param xs: Array of x coordinates for the keypoints
        :param ys: Array of y coordinates for the keypoints
        :param shape: shape of the heatmaps
        :param image_height: Height of the images the keypoints are for
        :param image_width: Width of the images the keypoints are for
        :param std: Standard deviation of the gaussion function used
        
        :return: Heatmaps as numpy arrays of shape (shape, shape, n_keypoints)
    """
    
    # Rescale keypoints coordinates to the heatmaps scale
    # ys
    height_scale = shape/image_height
    ys = ys*height_scale
    # xs
    width_scale = shape/image_width
    xs = xs*width_scale
    
    
    # Render a heatmap for each joint
    heatmaps = gaussian_heatmap(shape, (xs[0],ys[0]))
    for i, v in enumerate(vs):
        if i!=0:
            # If the joint is visible, generate a heatmaps
            if v!=0:
                new_heatmap = gaussian_heatmap(shape, (xs[i],ys[i]))
            # Otherwise the heatmaps is composed of zeros
            else:
                new_heatmap = np.zeros((1, shape, shape))
            heatmaps = np.append(heatmaps, new_heatmap, axis=0)

    return heatmaps

def keypoints_from_heatmap(heatmap):
    """Get the coordinates of the max value heatmap - it is the keypoint"""
    max_heatmap = np.amax(heatmap)
    keypoints = np.where(heatmap == max_heatmap)
    if len(keypoints) == 2:
        return keypoints[1][0], keypoints[0][0], max_heatmap
        
    elif len(keypoints) == 3:
        return keypoints[2][0], keypoints[1][0], max_heatmap

def keypoints_from_heatmaps(heatmaps, shape=32, image_height=512, image_width=640):
    """Get the coordinates of the keypoints from the 17 heatmaps"""
    keypoints = []
    for i, heatmap in enumerate(heatmaps):
        x, y, max_heatmap = keypoints_from_heatmap(heatmap)
        if max_heatmap == 0:
            keypoints += [0,0,0]
        else:
            x = x*image_width/shape
            y = y*image_height/shape
            keypoints += [x,y,2]
    return keypoints

def get_xs_ys_vs(keypoints):
    """ Splits MSCOCO keypoints notations from [x0, y0, v0, ...] to [x0, ...], [y0, ...] and [v0, ...] """
    keypoints_array = np.asarray(keypoints)
    xs = np.take(keypoints_array, [3*i for i in range(17)])
    ys = np.take(keypoints_array, [3*i+1 for i in range(17)])
    vs = np.take(keypoints_array, [3*i+2 for i in range(17)])
    return xs, ys, vs

def heatmaps_from_keypoints(keypoints):
    xs, ys, vs = get_xs_ys_vs(keypoints)
    heatmaps = gaussian_heatmaps(xs, ys, vs)
    return heatmaps

### Dataset

In [4]:
class MSCOCO(data.Dataset):
    """ Represents a MSCOCO Keypoints dataset """
    
    def __init__(self, images_folder, annotations_json, train=False, evalu=False, input_type=0):
        """ Instantiate a MSCOCO dataset """
        super().__init__()
        
        self.images_folder = images_folder
        #Input type indicates if the input is the original image or a combination of original image with filtered image
        #O : original image
        #1 : original image + skin filtered 
        #2 : original image + edge filter 
        #3 : original image + clustering filter 
        #4 : orignal image + skin filter + edge filter
        #5 : orignal image + skin filter + clustering filter
        self.input_type = input_type
        
        # Load the annotations
        self.annotations = COCO(annotations_json)
        imgs_id = self.annotations.getImgIds()
        if train:
            self.img_ids = imgs_id[:int(len(imgs_id)*2/3)]
        
        elif evalu:
            self.img_ids = imgs_id[int(len(imgs_id)*2/3)+1:]
        
        else:
            self.img_ids = imgs_id        
    
    def __len__(self):
        return len(self.img_ids)
            
    def __getitem__(self, index):
        """ Returns the index-th image with keypoints annotations, both as tensors """
        
        try:
            #L is the list of the input's path for a single image
            L = []
            input_imgs = []

            # Get the image informations
            img_id = self.img_ids[index]
            img = self.annotations.loadImgs(img_id)[0]
            
            # Load the image from the file
            img_path = os.path.join(self.images_folder, img['file_name'])
            L.append(img_path)
            
            #Need to adapt it depending on the path of the filtered image
            if self.input_type == 1 or self.input_type == 4 or self.input_type == 5:
                L.append(img_path) #Need to change with skin filtered image
            if self.input_type == 2 or self.input_type == 4:
                L.append(img_path) #Need to change with edge filtered image
            if self.input_type == 3 or self.input_type == 5:
                L.append(img_path) #Need to change with clustering filtered image
            
            for image in L:
                img_array = load_image(image)
                img_array = MSCOCO.transformGreyImage(img_array)
                img_tensor = torch.from_numpy(img_array)
                img_tensor = img_tensor.float() # Pytorch needs a float tensor
                input_imgs.append(img_tensor)
                
            # Get the keypoints
            annIds = self.annotations.getAnnIds(imgIds=img['id'])
            anns = self.annotations.loadAnns(annIds)
            # Some images do not contain any coco object, so anns = []
            if len(anns)>0:
                keypoints = anns[0]['keypoints'] # anns is a list with only one element
            else:
                # keypoints are not visible so 
                keypoints = [0 for i in range(3*17)]
                
            # Check to avoid errors
            if len(keypoints)!=3*17:
                print('Warning: Keypoints list for image {} has length {} instead of 17'.format(img_id, len(keypoints)))
        
            # Generate the heatmaps
            heatmaps_array = heatmaps_from_keypoints(keypoints)
            
            #img_tensor_input = torch.cat((img_tensor,img_tensor_filtered),0)
            keypoints_tensor = torch.from_numpy(heatmaps_array).float() # Pytorch needs a float tensor
            img_tensor = torch.cat(input_imgs,0)
            
            return img_tensor, keypoints_tensor

        except:
            #L is the list of the input's path for a single image
            L = []
            input_imgs = []

            # Get the image informations
            img_id = 391895
            img = self.annotations.loadImgs(img_id)[0]
            
            # Load the image from the file
            img_path = os.path.join(self.images_folder, img['file_name'])
            L.append(img_path)
            
            #Need to adapt it depending on the path of the filtered image
            if self.input_type == 1 or self.input_type == 4 or self.input_type == 5:
                L.append(img_path) #Need to change with skin filtered image
            if self.input_type == 2 or self.input_type == 4:
                L.append(img_path) #Need to change with edge filtered image
            if self.input_type == 3 or self.input_type == 5:
                L.append(img_path) #Need to change with clustering filtered image
            
            for image in L:
                img_array = load_image(image)
                img_array = MSCOCO.transformGreyImage(img_array)
                img_tensor = torch.from_numpy(img_array)
                img_tensor = img_tensor.float() # Pytorch needs a float tensor
                input_imgs.append(img_tensor)
                
            # Get the keypoints
            annIds = self.annotations.getAnnIds(imgIds=img['id'])
            anns = self.annotations.loadAnns(annIds)
            # Some images do not contain any coco object, so anns = []
            if len(anns)>0:
                keypoints = anns[0]['keypoints'] # anns is a list with only one element
            else:
                # keypoints are not visible so 
                keypoints = [0 for i in range(3*17)]
                
            # Check to avoid errors
            if len(keypoints)!=3*17:
                print('Warning: Keypoints list for image {} has length {} instead of 17'.format(img_id, len(keypoints)))
        
            # Generate the heatmaps
            heatmaps_array = heatmaps_from_keypoints(keypoints)
            
            #img_tensor_input = torch.cat((img_tensor,img_tensor_filtered),0)
            keypoints_tensor = torch.from_numpy(heatmaps_array).float() # Pytorch needs a float tensor
            img_tensor = torch.cat(input_imgs,0)
            
            return img_tensor, keypoints_tensor 

    @staticmethod
    def transformGreyImage(img_array):
        # Black and white images
        if len(img_array.shape)==2:
            # Add a channel axis
            img_array = np.expand_dims(img_array, axis=2)
            # Fill all the axes with the black&white image
            img_array = np.concatenate((img_array, img_array, img_array), axis=2)
        img_array = np.transpose(img_array, (2,1,0))
        return img_array


# Homemade image loader
def load_image(image_path):
    image = imread(image_path)
    image = resize(image, (256, 256))
    return image

### Model

In [5]:
class ConvRelu(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, training=True, padding=1, stride=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels,
                            out_channels,
                            kernel_size,
                            padding=padding,
                            stride=stride)

        self.relu = nn.ReLU()
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.training = training

    def forward(self, x):
        x = self.relu(self.conv(x))
        if self.training:
            x = self.batch_norm(x)
        return x


class Model(nn.Module):
    def __init__(self, input_type=0):
        super().__init__()
        self.pool = nn.MaxPool2d(2)
        
        #1 image
        if input_type == 0:
            input_size = 3
        
        #2 images
        elif input_type == 1 or input_type == 2 or input_type == 3:
            input_size = 6
        
        #3 images
        elif input_type == 4 or input_type == 5:
            input_size = 9
        self.feature_extraction = nn.Sequential(
                ConvRelu(input_size, 64, 3),
                ConvRelu(64, 64, 3),
                self.pool,
                ConvRelu(64, 128, 3),
                #ConvRelu(128, 128, 3),
                self.pool,
                ConvRelu(128, 128, 3),
                #ConvRelu(128, 128, 3),
                self.pool,
                ConvRelu(128, 512, 3),
                #ConvRelu(512, 512, 3),
                )
        
        self.features_to_heatmaps = nn.Conv2d(512, 17, 1) # 17 kind of joints, 17 heatmaps

    def forward(self, x):
        x = self.feature_extraction(x)
        heatmaps = self.features_to_heatmaps(x)
        return heatmaps

def plotKeypointsOverOutputModel(index,dataset,model,img_folder):
    """Forward a img to the model and display the output keypoints over the image.
       It enables us to see the loss evolution over the model visually over the image
       index is the index of the img in the dataset argument"""
    # Get an image
    imgId = dataset.img_ids[index]
    img, keypoints = dataset[index]

    # Transform into a pytorch model input and Forward pass 
    y = model(Variable(img.unsqueeze(0)))

    #Get the coordinates of the keypoints
    keypoints = keypoints_from_heatmaps(y[0].data.numpy())

    # Plot the image
    img_anno = dataset.annotations.loadImgs(imgId)[0]
    img_path = os.path.join(img_folder, img_anno['file_name'])
    img_array = load_image(img_path)
    img_array_resized = resize(img_array, (512, 640))
    plt.figure()
    plt.title('Original image')
    plt.imshow(img_array_resized)
    xs,ys,vs = get_xs_ys_vs(keypoints)
    plt.plot(xs,ys,'ro',color='c')
    plt.show()

### Configuration of the training

In [6]:
def conf_training(resuming=False, input_type=0, *args):
    """Function that initiates the configuration of the model depending if a last model
       is loaded or if it's the beginning of a new model"""
    
    #Data
    trainset = MSCOCO(IMAGES_FOLDER, ANNOTATION_FILE, train=True, input_type=input_type)
    evalset = MSCOCO(IMAGES_FOLDER, ANNOTATION_FILE, evalu=True, input_type=input_type)

    # Loss
    criterion = nn.MSELoss()
    #criterion = nn.CrossEntropyLoss()
    
    # Number of epochs
    epochs = 10

    # Batch sizes
    batch_size_train = 1
    batch_size_val = 1
    
    if not resuming:
        # Model
        net = Model(input_type=input_type)

        # Optimizer
        optimizer = torch.optim.Adam(net.parameters())
        
        #First epoch
        current_epoch = -1
    
    else:
        #Load the last saved model with its configurations
        checkpoint = torch.load(os.path.join(MAIN_FOLDER,"model_"+args[0]))
        
        #Model
        net = Model(input_type=input_type)
        net.load_state_dict(checkpoint['state_dict'])
        
        #Current_epoch
        current_epoch = checkpoint['epoch']
        
        #Optimizer
        optimizer = torch.optim.Adam(net.parameters())
    
    #Data loaders
    trainloader = torch.utils.data.DataLoader(trainset,
                                         batch_size=batch_size_train,
                                         shuffle=True,
                                         num_workers=4
                                        )

    evaloader = torch.utils.data.DataLoader(evalset,
                                         batch_size=batch_size_val,
                                         shuffle=True,
                                         num_workers=4
                                        )
    
    evalset_length = len(evalset)
    
    return epochs, trainloader, evaloader, optimizer, net, current_epoch, criterion, evalset_length, evalset

### Running the model

In [7]:
def training(epochs, trainloader, evaloader, optimizer, net, current_epoch, criterion, evalset_length, evalset):
    plt.ion()
    if current_epoch == -1:
        #If not resuming a model, creating the loss file
        lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'wb')
        pickle.dump({"loss_train":{}, "loss_val":{}},lossFile)
        lossFile.close()
    
    start_epoch = current_epoch + 1
    for epoch in range(start_epoch, epochs):  # loop over the dataset multiple times
        print("Epoch number {}".format(epoch))
        #plotKeypointsOverOutputModel(0,evalset,net,IMAGES_FOLDER)#Displaying the result over the first element of the evalset
        running_loss = 0.0

        #For each epoch, we keep the loss under a dictionnary with epoch_nb as key and list of loss as value
        lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'rb')
        loss_dic = pickle.load(lossFile)
        lossFile.close()
        lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'wb')
        loss_dic['loss_train'][epoch] = []
        loss_dic['loss_val'][epoch] = []
        pickle.dump(loss_dic,lossFile)
        lossFile.close()

        for i, data in enumerate(trainloader, 0):
            print("Batch number {}".format(i))
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs, labels = Variable(inputs), Variable(labels)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.data[0]
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('Trainset loss[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
            
            #Save the loss_train in disk for each batch
            lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'rb')  
            loss_dic = pickle.load(lossFile)
            lossFile.close()
            lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'wb')
            loss_dic['loss_train'][epoch] += [loss.data[0]]
            pickle.dump(loss_dic,lossFile)
            lossFile.close()
        
        #Save the model
        #net.cpu()
        state = {
            'epoch': epoch,
            'state_dict': net.state_dict()
        }
        torch.save(state, os.path.join(MAIN_FOLDER,"model_"+str(epoch))) #Save the torch model after each epoch
        
        #net.cuda()
        running_loss_eval = 0.0
        print("Starting Eval for Epoch {}".format(epoch))
        for i, data in enumerate(evaloader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs, labels = Variable(inputs), Variable(labels)

            # forward 
            outputs = net(inputs)
            loss = criterion(outputs, labels)

            # print statistics
            running_loss_eval += loss.data[0]

            #Save the loss_val in disk for each batch
            lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'rb')  
            loss_dic = pickle.load(lossFile)
            lossFile.close()
            lossFile = open(os.path.join(MAIN_FOLDER,"loss"),'wb') 
            loss_dic['loss_val'][epoch] += [loss.data[0]]
            pickle.dump(loss_dic,lossFile)
            lossFile.close()

        print("Evalset Loss for Epoch {0} : {1}".format(epoch,running_loss_eval/evalset_length))
        #loss_val[epoch] += [running_loss_eval/evalset_length] #Stock the loss on evalset for each epoch
    
        

    print('Finished Training')

def launch_training(resuming=False, input_type=0, *args):
    """Function that configurates the model from init or a last model ; and then it trains the model"""
    epochs, trainloader, evaloader, optimizer, net, current_epoch, criterion, evalset_length, evalset = conf_training(resuming=resuming,input_type=input_type, *args)
    training(epochs, trainloader, evaloader, optimizer, net, current_epoch, criterion, evalset_length, evalset)

def launch_testing(model_epoch, input_type=0):
    """Function that launches a model over the test dataset"""
    testset = MSCOCO(IMAGES_FOLDER_TEST, ANNOTATION_FILE_TEST,input_type=input_type)

    #Load the training model
    checkpoint = torch.load(os.path.join(MAIN_FOLDER, model_epoch))
    net = Model(input_type=input_type)
    net.load_state_dict(checkpoint['state_dict'])

    # Loss
    criterion = nn.MSELoss()

    # Batch sizes
    batch_size_test = 1

    #TestLoader
    evaloader = torch.utils.data.DataLoader(testset,
                                            batch_size=batch_size_test,
                                            shuffle=True,
                                            num_workers=4
                                            )

    loss_test = 0.0
    for i, data in enumerate(evaloader):
        inputs, labels = data[0], data[1]
        inputs, labels = Variable(inputs), Variable(labels)
        outputs = net(inputs)
        loss = criterion(y, outputs)
        loss_test += loss.data[0]
        if i % 500 ==0:
            print("Current loss over the test dataset: {0} after {1}ème iteration".format(loss_test/(i+1),i+1))

    loss_test = loss_test/len(testset)
    print("Average loss over the test dataset: {}".format(loss_test))

In [8]:
#Launch a training over a new model with inputSize = 0
launch_training(False,0)

loading annotations into memory...
Done (t=21.31s)
creating index...
index created!
loading annotations into memory...
Done (t=38.47s)
creating index...
index created!
Epoch number 0


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("The default mode, 'constant', will be changed to 'reflect' in "


not visible
not visible
not visible
Batch number 0
not visible
Batch number 1
not visible
Batch number 2
Batch number 3
not visible
Batch number 4
not visible
Batch number 5
Batch number 6
not visible
Batch number 7
Batch number 8
Batch number 9
not visible
Batch number 10
Batch number 11
Batch number 12
not visible
Batch number 13
not visible
Batch number 14
Batch number 15
not visible
Batch number 16
not visible
Batch number 17
Batch number 18
Batch number 19
not visible
not visible
Batch number 20
Batch number 21
Batch number 22
Batch number 23
Batch number 24
Batch number 25
Batch number 26
not visible
Batch number 27
not visible
Batch number 28
not visible
Batch number 29
Batch number 30
Batch number 31
not visible
Batch number 32
not visible
Batch number 33
not visible
Batch number 34
not visible
Batch number 35
not visible
Batch number 36
Batch number 37
Batch number 38
Batch number 39
Batch number 40
Batch number 41
Batch number 42
Batch number 43


Process Process-4:
Process Process-3:
Process Process-1:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/alexandresioufi/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Use

KeyboardInterrupt: 

In [None]:
#Launch a training over a model currently trained with inputSize = 0
#launch_training(True,0,path_model)

In [None]:
#Launch a trained model over the test dataset, with inputSize = 0
#launch_testing(path_model,0)

In [13]:
%cd cocoapi
!ls

/Users/alexandresioufi/Documents/Projets infos/deeplearning/dl_project/cocoapi
[34mLuaAPI[m[m      [34mPythonAPI[m[m   [34mcommon[m[m      [34mresults[m[m
[34mMatlabAPI[m[m   README.txt  license.txt
