In [None]:
from __future__ import print_function

In [None]:
import torch

import cv2

import torch.optim as optim
import torch.nn as nn

import sys

import torchvision.transforms as transforms
from torch.autograd import Variable
import random

from torchvision import models
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
!jupyter nbconvert --to python models/MultiFrameCNN.ipynb
from models import MultiFrameCNN
reload(MultiFrameCNN)

In [None]:
!jupyter nbconvert --to python dataset/VideoSequenceDataset.ipynb
from dataset import VideoSequenceDataset
reload(VideoSequenceDataset)

In [None]:
!jupyter nbconvert --to python models/SuperMobile.ipynb
from models import SuperMobile
reload(SuperMobile)

In [None]:
!jupyter nbconvert --to python models/ShuffleNet.ipynb
from models import ShuffleNet
reload(ShuffleNet)

In [None]:
def test(model, testDataset, cuda=True):
    model = model.eval()
    tot = 0
    cor = 0
    for i, (batch, labels) in enumerate(testDataset):
        if cuda:
            batch = batch.cuda()
            labels = labels.cuda()
        outputs=model(Variable(batch))
        _, pred = torch.max(outputs.data, 1)
        tot += labels.size(0)
        cor += (pred == labels).sum()
        
    print(cor, "/", tot, " : ", cor*1.0/tot*100, "%")
    

In [None]:
def train(model, optimizer, trainDataset, valDataset, trans, nbepoch=5, cuda=True):
    #criterion = nn.CrossEntropyLoss(Variable(torch.Tensor([1/5.2,1/5.2,1/5.2,1/5.2,1/5.2,0.2/5.2])))
    criterion = nn.CrossEntropyLoss()
    if cuda:
        model = model.cuda()
    for epoch in range(nbepoch):
        print("Epoch ", epoch, "precision : ")
        test(model, valDataset, cuda)
        
        model = model.train()
        lo = 0
        for i, (batch,labels) in enumerate(trainDataset):
            if cuda:
                batch = batch.cuda()
                labels = labels.cuda()
            
            outputs = model(Variable(batch))
            #label = torch.LongTensor([gesture])
            loss = criterion(outputs, Variable(labels))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lo += loss.data[0]
            if i%10 == 0:
                print(i,' : ', lo/10.0)
                lo = 0

In [None]:
#model = MultiFrameCNN.MultiFrameResNet(MultiFrameCNN.BasicBlock, [2,2,2,2], sequence=1, num_classes=27, groups=1)
#with 3 frame, and 3 groups and skip 1: accuracy 100%
#model = MultiFrameCNN.DummyMultiFrame()
#model = MultiFrameCNN.MultiFrameCNN(nbClasse=6,nbFrame=3)
#model = MultiFrameCNN.MultiFrameCNN(3) -> 58,333%
#model.copyParameters(models.alexnet(pretrained=True))
#model = models.AlexNet(num_classes=6)
#model = model.cuda()
#model=SuperMobile.SuperMobile()
#model=SuperMobile.DenseMobile(in_channel=3, num_classes=27, nb_frames=3)
#model=SuperMobile.DenseMobile(nb_frames=3)
model = ShuffleNet.ShuffleNet(num_classes=6)

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from collections import OrderedDict
from torch.nn import init
import torch.nn.functional as F

def conv3x3(in_channels, out_channels, stride=1, 
            padding=1, bias=True, groups=1):    
    """3x3 convolution with padding
    """
    return nn.Conv2d(
        in_channels, 
        out_channels, 
        kernel_size=3, 
        stride=stride,
        padding=padding,
        bias=bias,
        groups=groups)


def conv1x1(in_channels, out_channels, groups=1):
    """1x1 convolution with padding
    - Normal pointwise convolution When groups == 1
    - Grouped pointwise convolution when groups > 1
    """
    return nn.Conv2d(
        in_channels, 
        out_channels, 
        kernel_size=1, 
        groups=groups,
        stride=1)

class ShuffleUnit(nn.Module):
    def __init__(self, in_channels, out_channels, groups=3,
                 grouped_conv=True, combine='add'):
        
        super(ShuffleUnit, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.grouped_conv = grouped_conv
        self.combine = combine
        self.groups = groups
        self.bottleneck_channels = self.out_channels // 4

        # define the type of ShuffleUnit
        if self.combine == 'add':
            # ShuffleUnit Figure 2b
            self.depthwise_stride = 1
            self._combine_func = self._add
        elif self.combine == 'concat':
            # ShuffleUnit Figure 2c
            self.depthwise_stride = 2
            self._combine_func = self._concat
            
            # ensure output of concat has the same channels as 
            # original output channels.
            self.out_channels -= self.in_channels
        else:
            raise ValueError("Cannot combine tensors with \"{}\"" \
                             "Only \"add\" and \"concat\" are" \
                             "supported".format(self.combine))

        # Use a 1x1 grouped or non-grouped convolution to reduce input channels
        # to bottleneck channels, as in a ResNet bottleneck module.
        # NOTE: Do not use group convolution for the first conv1x1 in Stage 2.
        self.first_1x1_groups = self.groups if grouped_conv else 1

        self.g_conv_1x1_compress = self._make_grouped_conv1x1(
            self.in_channels,
            self.bottleneck_channels,
            self.first_1x1_groups,
            batch_norm=True,
            relu=True
            )

        # 3x3 depthwise convolution followed by batch normalization
        self.depthwise_conv3x3 = conv3x3(
            self.bottleneck_channels, self.bottleneck_channels,
            stride=self.depthwise_stride, groups=self.bottleneck_channels)
        self.bn_after_depthwise = nn.BatchNorm2d(self.bottleneck_channels)

        # Use 1x1 grouped convolution to expand from 
        # bottleneck_channels to out_channels
        self.g_conv_1x1_expand = self._make_grouped_conv1x1(
            self.bottleneck_channels,
            self.out_channels,
            self.groups,
            batch_norm=True,
            relu=False
            )


    @staticmethod
    def _add(x, out):
        # residual connection
        return x + out

    
    def _channel_shuffle(self, x, groups):
        batchsize, num_channels, height, width = x.data.size()

        channels_per_group = num_channels // groups
        x = x.view(batchsize, groups, channels_per_group, height, width) #add a new dimension

        x = torch.transpose(x, 1, 2).contiguous()
        return x.view(batchsize, -1, height, width)

    @staticmethod
    def _concat(x, out):
        # concatenate along channel axis
        return torch.cat((x, out), 1)


    def _make_grouped_conv1x1(self, in_channels, out_channels, groups,
        batch_norm=True, relu=False):

        modules = OrderedDict()

        conv = conv1x1(in_channels, out_channels, groups=groups)
        modules['conv1x1'] = conv

        if batch_norm:
            modules['batch_norm'] = nn.BatchNorm2d(out_channels)
        if relu:
            modules['relu'] = nn.ReLU()
        if len(modules) > 1:
            return nn.Sequential(modules)
        else:
            return conv


    def forward(self, x):
        # save for combining later with output
        residual = x

        if self.combine == 'concat':
            residual = F.avg_pool2d(residual, kernel_size=3, 
                stride=2, padding=1)

        out = self.g_conv_1x1_compress(x)
        out = self._channel_shuffle(out, self.groups)
        out = self.depthwise_conv3x3(out)
        out = self.bn_after_depthwise(out)
        out = self.g_conv_1x1_expand(out)
        
        out = self._combine_func(residual, out)
        return F.relu(out)


class ShuffleNet(nn.Module):
    """ShuffleNet implementation.
    """

    def __init__(self, groups=3, in_channels=3, num_classes=1000):
        """ShuffleNet constructor.
        Arguments:
            groups (int, optional): number of groups to be used in grouped 
                1x1 convolutions in each ShuffleUnit. Default is 3 for best
                performance according to original paper.
            in_channels (int, optional): number of channels in the input tensor.
                Default is 3 for RGB image inputs.
            num_classes (int, optional): number of classes to predict. Default
                is 1000 for ImageNet.
        """
        super(ShuffleNet, self).__init__()

        self.groups = groups
        self.stage_repeats = [3, 7, 3]
        self.in_channels =  in_channels
        self.num_classes = num_classes

        # index 0 is invalid and should never be called.
        # only used for indexing convenience.
        if groups == 1:
            self.stage_out_channels = [-1, 24, 144, 288, 567]
        elif groups == 2:
            self.stage_out_channels = [-1, 24, 200, 400, 800]
        elif groups == 3:
            self.stage_out_channels = [-1, 24, 240, 480, 960]
        elif groups == 4:
            self.stage_out_channels = [-1, 24, 272, 544, 1088]
        elif groups == 8:
            self.stage_out_channels = [-1, 24, 384, 768, 1536]
        else:
            raise ValueError(
                """{} groups is not supported for
                   1x1 Grouped Convolutions""".format(num_groups))
        
        # Stage 1 always has 24 output channels
        self.conv1 = conv3x3(self.in_channels,
                             self.stage_out_channels[1], # stage 1
                             stride=2)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Stage 2
        self.stage2 = self._make_stage(2)
        # Stage 3
        self.stage3 = self._make_stage(3)
        # Stage 4
        self.stage4 = self._make_stage(4)

        # Global pooling:
        # Undefined as PyTorch's functional API can be used for on-the-fly
        # shape inference if input size is not ImageNet's 224x224

        # Fully-connected classification layer
        num_inputs = self.stage_out_channels[-1]
        self.fc = nn.Linear(num_inputs, self.num_classes)
        self.init_params()


    def init_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant(m.weight, 1)
                init.constant(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant(m.bias, 0)


    def _make_stage(self, stage):
        modules = OrderedDict()
        stage_name = "ShuffleUnit_Stage{}".format(stage)
        
        # First ShuffleUnit in the stage
        # 1. non-grouped 1x1 convolution (i.e. pointwise convolution)
        #   is used in Stage 2. Group convolutions used everywhere else.
        grouped_conv = stage > 2
        
        # 2. concatenation unit is always used.
        first_module = ShuffleUnit(
            self.stage_out_channels[stage-1],
            self.stage_out_channels[stage],
            groups=self.groups,
            grouped_conv=grouped_conv,
            combine='concat'
            )
        modules[stage_name+"_0"] = first_module

        # add more ShuffleUnits depending on pre-defined number of repeats
        for i in range(self.stage_repeats[stage-2]):
            name = stage_name + "_{}".format(i+1)
            module = ShuffleUnit(
                self.stage_out_channels[stage],
                self.stage_out_channels[stage],
                groups=self.groups,
                grouped_conv=True,
                combine='add'
                )
            modules[name] = module

        return nn.Sequential(modules)


    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)

        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)

        # global average pooling layer
        x = F.avg_pool2d(x, x.data.size()[-2:])
        
        # flatten for input to fully-connected layer
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return F.log_softmax(x, dim=1)

In [None]:
def nbParameters(net):
    p = 0
    for m in net.parameters():
        p += m.data.nelement()
    return p

In [None]:
if __name__ == '__main__':
    #model=SuperMobile.DenseMobile(first_group=True)
    print(nbParameters(model))
    trans = transforms.Compose(
                (
                    transforms.ToPILImage(),
                    transforms.Resize(224),
                transforms.RandomCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406,0.485, 0.456, 0.406,0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225,0.229, 0.224, 0.225,0.229, 0.224, 0.225]),
                )
                )

    trainDataset = VideoSequenceDataset.VideoDataset(rep="/video/GestureSequence/", SequenceSize=1, batchSize=32, transform=trans, 
                concat=True, dropFrame=1)
    testDataset = VideoSequenceDataset.VideoDataset(rep="/video/GestureTest/", SequenceSize=1, batchSize=16, transform=trans, 
            concat=True, dropFrame=1)
    
    
    optimizer = optim.SGD( model.parameters(),lr=0.0001, momentum=0.9, weight_decay=0.0005)
    train(model,optimizer,trainDataset,testDataset,trans, nbepoch=30, cuda=True)
    
    torch.save(model, "gesture_best_1.pytorch")

In [None]:
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import numpy as np
import random

%matplotlib inline
def show(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')

In [None]:
show(make_grid([a[0][4][:3], a[0][4][3:6], a[0][4][6:9]], padding=1, normalize=False))