In [1]:
import numpy as np
import torch
from torchvision import models, transforms, datasets
import torch.nn.functional as F
from torch import nn, optim
import face_detector
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import time
from PIL import Image

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda')

In [4]:
model_1 = models.vgg19(pretrained=True)
model_2 = models.vgg19(pretrained=True)
model_3 = models.vgg19(pretrained=True)
model_4 = models.vgg19(pretrained=True)
model_5 = models.vgg19(pretrained=True)

model_list = [model_1, model_2, model_3, model_4, model_5]

In [5]:
class Network1(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.layer1 = nn.Linear(25088,5000)
        self.layer2 = nn.Linear(5000, 500)
        self.layer3 = nn.Linear(500, 2)

    def forward(self, x):
        
        #network 1
        x = x.view(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x

class Network2(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        #network2
        self.layer1 = nn.Linear(25088,5000)
        self.layer2 = nn.Linear(5000, 500)
        self.layer3 = nn.Linear(500, 2)

    def forward(self, x):
        
        #network2
        x = x.view(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x

class Network3(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        #network3
        self.layer1 = nn.Linear(25088,5000)
        self.layer2 = nn.Linear(5000, 500)
        self.layer3 = nn.Linear(500, 2)

    def forward(self, x):
        
        #network3
        x = x.view(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x
    
class Network4(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        #network4
        self.layer1 = nn.Linear(25088,5000)
        self.layer2 = nn.Linear(5000, 500)
        self.layer3 = nn.Linear(500, 2)

    def forward(self, x):
        
        #network4
        x = x.view(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x
    
class Network5(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        #network5
        self.layer1 = nn.Linear(25088,5000)
        self.layer2 = nn.Linear(5000, 500)
        self.layer3 = nn.Linear(500, 2)
    def forward(self, x):
        
        #network5
        x = x.view(x.shape[0], -1)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        
        return x
    
network_list = [Network1(), Network2(), Network3(), Network4(), Network5()]

In [6]:
class EnsembleNetwork(nn.Module):
    
    def __init__(self, model_list):
        super().__init__()
        
        self.models = model_list
        self.layer1 = nn.Linear(10,2)
        
    def forward(self, x):
        
        output = []
        
        
        for model, img in zip(self.models, x):
            output.append(model(img))
        
        output = torch.cat(output, dim=1)
        x = F.softmax(self.layer1(output), dim=1)
        
        return x

In [7]:
for model in model_list:

    for param in model.parameters():
        param.requires_grad = False

for model, network in zip(model_list, network_list):
    
    model.classifier = network


In [8]:
data_transforms = transforms.Compose([face_detector.FaceCropper(),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485,0.456,0.406],
                                                           std=[0.229,0.224,0.225])])

In [9]:
extensions = '.mp4'

data_dir = './videos'

train_dir = data_dir + '/train_sample_videos'
test_dir = data_dir + '/test_videos'

In [10]:
# train_data = datasets.DatasetFolder(train_dir,
#                                     loader=face_detector.random_frame_selector,
#                                     extensions=extensions,
#                                     transform=data_transforms)

In [11]:
# trainloader = torch.utils.data.DataLoader(train_data,
#                                           batch_size=16,
#                                           shuffle=True,
#                                           num_workers=4)

In [12]:
ensemble_network = EnsembleNetwork(model_list)

In [13]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(ensemble_network.parameters(), lr=0.001)

In [14]:
def image_split(x):
    
    '''Splits a 224x224 image into 4 equal quarters.
    
    Returns: 
    Whole image, Top left corner, Bottom left corner,
    Top right corner, Bottom right corner.'''
    
    return x, x[:,:,0:112,0:112], x[:,:,0:112,112:224],\
           x[:,:,112:224,0:112], x[:,:,112:224,112:224]

In [16]:
ensemble_network.to(device)
for model in model_list:
    model.to(device)

epoch = 1

start = time.time()
for e in range(epoch):

    train_data = datasets.DatasetFolder(train_dir,
                                    loader=face_detector.random_frame_selector,
                                    extensions=extensions,
                                    transform=data_transforms)

    trainloader = torch.utils.data.DataLoader(train_data,
                                          batch_size=16,
                                          shuffle=True,
                                          num_workers=0)


    train_losses = []
    running_loss = 0

    for images, labels in trainloader:

        images = images.to(device)
        labels = labels.to(device)
        splits = image_split(images)
        output = ensemble_network(splits)

        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        running_loss += loss.item()
        train_losses.append(running_loss/len(trainloader))

    print(f"Training loss: {running_loss}")

end = time.time()

print(f'Time taken for {epoch} epochs : {end - start}')

Training loss: -15.254588603973389
Time taken for 1 epochs : 137.82240414619446


In [17]:
accuracy = 0

with torch.no_grad():
    trained_model.eval()
    for images, labels in trainloader:

        images = images.to(device)
        labels = labels.to(device)
        
        splits = image_split(images)

        log_ps = ensemble_network(splits)
        
        top_p, top_class = log_ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        
        accuracy += torch.mean(equals.type(torch.FloatTensor))
        
print(f'Accuracy : {accuracy / len(trainloader)}')

NameError: name 'trained_model' is not defined