In [1]:
import numpy as np
import torch
from torchvision import models, transforms, datasets
import torch.nn.functional as F
from torch import nn, optim
import face_detector
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import time

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
torch.cuda.is_available()

True

In [4]:
model_1 = models.resnet18(pretrained=True)

In [5]:
class Network(nn.Module):
    
    '''Predicts the probabilty that the player playing white is going to win.'''
    
    def __init__(self):
        super(Network, self).__init__()
        
        
        self.layer1 = nn.Linear(224,100)
        self.layer2 = nn.Linear(100, 50)
        self.layer3 = nn.Linear(50, 2)

    def forward(self, x):
        
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x

In [6]:
# for param in model_1.parameters():
#     param.requires_grad = True

model_1.classifier = Network()
model_1.share_memory()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
data_transforms = transforms.Compose([face_detector.FaceCropper(),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485,0.456,0.406],
                                                           std=[0.229,0.224,0.225])])

In [8]:
def random_frame_selector(video_source):
    
    video = cv2.VideoCapture(video_source)

    video_length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    random_frame = np.random.randint(0, video_length)

    video.set(1, random_frame)

    _, frame = video.read()
    
    return frame

In [9]:
extensions = '.mp4'

data_dir = './videos'

train_dir = data_dir + '/train_sample_videos'
test_dir = data_dir + '/test_videos'

In [10]:
train_data = datasets.DatasetFolder(train_dir,
                                    loader=random_frame_selector,
                                    extensions=extensions,
                                    transform=data_transforms)

In [11]:
trainloader = torch.utils.data.DataLoader(train_data,
                                          batch_size=32,
                                          shuffle=True,
                                          num_workers=0,
                                          pin_memory=True)

In [12]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model_1.classifier.parameters(), lr=0.001)
model_1.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
start = time.time()
for data in trainloader:
    images, labels = data
    end = time.time()
    break
    
print(f'Elapsed time : {end - start}')

Elapsed time : 10.162174701690674


In [14]:

epoch = 100
model_1.train()
print('set model in train mode')

start = time.time()
for e in range(epoch):
    train_losses = []
    running_loss = 0
    train_start = time.time()
    
    for images, labels in trainloader:
        
        train_end = time.time()
        print(train_end - train_start)
        
        images = images.to(device)
        labels = labels.to(device)
        
        log_ps = model_1.forward(images)
        loss = criterion(log_ps, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_losses.append(running_loss/len(trainloader))

    print(f"Training loss: {running_loss}")

end = time.time()

print(f'Time taken for {epoch} epochs : {end - start}')

set model in train mode
9.622166872024536


KeyboardInterrupt: 