In [None]:
# boilerplate imports, trim later
import math
from random import sample
from random import randint

!pip install comet-ml &> /dev/null
import comet_ml

!pip install pytorch-lightning &> /dev/null
import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger
import tensorboard
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import cv2

!pip install pytorch-msssim &> /dev/null
from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM

import matplotlib.pyplot as plt
from livelossplot import PlotLosses
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Preprocessing  (1.1)

In [None]:
def load_video(filepath, start_frame=0, end_frame=-1):
    # import video
    video = cv2.VideoCapture(filepath)
    # get frame size (to size array) and number of frames
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/ resizing image
    scale_percent = 50
    width = int(frame_width * scale_percent / 100)
    height = int(frame_height * scale_percent / 100)
    dim = (width, height)
    
    if end_frame == -1 or end_frame > frame_count:
        end_frame = frame_count

    frames = np.empty((end_frame-start_frame, height, width, 3), np.dtype('uint8'))

    for frame in range(start_frame,min(end_frame,frame_count)):
        success, img = video.read()
        if not success: break
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # reduce image size to help model memory
        frames[frame] = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

    video.release()
    return frames

def load_video_random_frames(filepath, number_of_frames):
    # import video
    video = cv2.VideoCapture(filepath)
    # get frame size (to size array) and number of frames
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/ resizing image
    scale_percent = 10 # 120x72 image size
    width = int(frame_width * scale_percent / 100)
    height = int(frame_height * scale_percent / 100)
    dim = (width, height)                  
    
    frames = np.empty((number_of_frames, height, width, 3), np.dtype('uint8'))
    
    used_frames = []
    for frame in range(number_of_frames):
        num = randint(0,frame_count-1)
        while True:
            if num not in used_frames:
                used_frames.append(num)
                break
            num = randint(0,frame_count-1)
        video.set(cv2.CAP_PROP_POS_FRAMES,num);
        success, img = video.read()
        if not success: break
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # reduce image size to help model memory
        frames[frame] = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
                     
    video.release()
    return frames

In [None]:
MAFIA_FILEPATH = "datasets/game/MafiaVideogame.mp4"
GODFATHER_FILEPATH = "datasets/movie/TheGodfather.mp4"
IRISHMAN_FILEPATH = "datasets/movie/TheIrishman.mp4"
SOPRANOS_FILEPATH = "datasets/movie/TheSopranos.mp4"

MAFIA_FRAMES = load_video_random_frames(MAFIA_FILEPATH,10)
#GODFATHER_FRAMES = load_video_random_frames(GODFATHER_FILEPATH)
#IRISHMAN_FRAMES = load_video_random_frames(IRISHMAN_FILEPATH)
#SOPRANOS_FRAMES = load_video_random_frames(SOPRANOS_FILEPATH)

"""
# view frames
count = 0
for frame in MAFIA_FRAMES:
    cv2.namedWindow(f'frame {count}')
    cv2.imshow(f'frame {count}', frame)
    cv2.waitKey(0)
    cv2.destroyWindow(f'frame {count}')
    count+=1
"""

# Frame-to-Frame Model (2.1)

##Generator

In [None]:
# used pytorch lightning to make the model neater, code layout from here: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#minimal-example
class Generator(pl.LightningModule):
    def __init__(self, learning_rate=1e-2):
        super().__init__()
        channels = 3
        nf = 64
        self.learning_rate = learning_rate

        def downsample_convolution(in_features, out_features):
                return nn.Sequential(
                            nn.Conv2d(in_features, out_features, 4, 2, 1),
                            nn.InstanceNorm2d(out_features*2),
                            nn.LeakyReLU(0.2))
            
        def residual_convolution(in_features):
            return nn.Sequential(nn.Conv2d(in_features, in_features, 3, 1, 1),
                                nn.Conv2d(in_features, in_features, 3, 1, 1))
            
        def upsample_convolution(in_features, out_features):
            return nn.Sequential(n
                        n.ConvTranspose2d(in_features, out_features, 4, 2, 1),
                        nn.InstanceNorm2d(out_features, 0.8),
                        nn.ReLU())
            
            
        self.conv1 = nn.Sequential(
                        nn.Conv2d(channels, nf, 4, 2, 1),
                        nn.LeakyReLU())
        self.downsample1 = downsample(nf,nf*2)
        self.residual1 = residual_convolution(nf*2)
        
        self.downsample2 = downsample(nf*2,nf*4)
        self.residual2 = residual_convolution(nf*4)
        self.upsample1 = upsample_convolution(nf*4, nf*2)
        
        self.residual3 = residual_convolution(nf*2)
        self.upsample2 = upsample_convolution(nf*2, nf)
        self.conv2 = nn.Sequential(
                    nn.ConvTranspose2d(nf, channels, 4, 2, 1),
                    nn.Tanh())

    def forward(self, x):
        x = self.conv1(x)
        pre_residual1_x = self.downsample1(x)
        x = torch.cat([pre_residual1_x, self.residual1(pre_residual1_x)], dim=1)
        pre_residual2_x = self.downsample2(x)
        x = torch.cat([pre_residual2_x, self.residual2(pre_residual2_x)], dim=1)
        pre_residual3_x = self.upsample1(x)
        x = torch.cat([pre_residual3_x, self.residual3(pre_residual3_x)], dim=1)
        x = self.upsample2(x)
        x = self.conv2(x)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        self.log("lr",lr)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        pass

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y = y.float()
        x_hat = self.forward(x)
        print(x_hat)
        print(y)
        loss = self.loss(x_hat, y)
        self.log('val_loss', loss)

    def test_step(self, batch, idx):
        x, y = batch
        y = y.float()
        x_hat = self.forward(x)
        loss = self.loss(x_hat, y)
        self.log('test_loss', loss)
        accuracy = torch.sum(torch.round(x_hat) == y) / len(y)
        self.log('test_acc', accuracy)
        return loss

##Discriminator

In [None]:
# used pytorch lightning to make the model neater, code layout from here: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#minimal-example
class Discriminator(pl.LightningModule):
    def __init__(self, learning_rate=1e-2):
        super().__init__()
        nf = 64
        channels = 3
        self.learning_rate = learning_rate
        
        def dilated_convolution(in_features, out_features, dilation):
            return nn.Sequential(nn.Conv2d(in_features, out_features, 4, 1, dilation, dilation),
                            nn.InstanceNorm2d(out_features),
                            nn.LeakyReLU(0.2, inplace=True))

        def convolution(in_features, out_features, kernal_size=4, stride=2, padding=1):
            return nn.Sequential(nn.Conv2d(in_features, out_features, kernal_size, stride, padding),
                            nn.InstanceNorm2d(out_features),
                            nn.LeakyReLU(0.2, inplace=True))
        
        self.layer1 = nn.Sequential(
                    nn.Conv2d(channels, nf, 4, 2, 1),
                    nn.LeakyReLU(0.2, inplace=True))
        self.layer2 = convolution(nf, nf*2)
        self.layer3 = convolution(nf*2, nf*4, 3, 1 , 1)
        self.layer4 = dilated_convolution(nf*4, nf*4, 2)
        self.layer5 = dilated_convolution(nf*4, nf*4, 4)
        self.layer7 = convolution(nf*8, nf*4, 3, 1, 1)
        self.layer8 = nn.Conv2d(nf*4, 1, 3, 1, 1)

    def forward(self, x):
        layer1 = self.layer1(x)
        layer2 = self.layer2(layer1)
        layer3 = self.layer3(layer2)
        layer4 = self.layer4(layer3)
        layer5 = self.layer5(layer4)
        layer6 = torch.cat([layer3,layer5], dim=1)
        layer7 = self.layer7(layer6)
        layer8 = self.layer8(layer7)
        return layer8, (layer2, layer3, layer4, layer5, layer7)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        self.log("lr",self.learning_rate)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        y = y.float()
        x_hat = self.forward(x)
        loss = self.loss(x_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y = y.float()
        x_hat = self.forward(x)
        print(x_hat)
        print(y)
        loss = self.loss(x_hat, y)
        self.log('val_loss', loss)

    def test_step(self, batch, idx):
        x, y = batch
        y = y.float()
        x_hat = self.forward(x)
        loss = self.loss(x_hat, y)
        self.log('test_loss', loss)
        accuracy = torch.sum(torch.round(x_hat) == y) / len(y)
        self.log('test_acc', accuracy)
        return loss