# MIE1517 Project
## Facial Expression Classification (CNN)
### 2021-11-17
Code from Katie, based on code from Alex
Current setup: using all labels and all expressions. Loading data from individual files not .tar files

## Package Imports

In [31]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import matplotlib.cbook as cbook
import matplotlib.image as image
import matplotlib.pyplot as plt
import os
import re

In [32]:
import argparse
import math
import time
from torch.autograd import Variable

## Data Loading

Helper function to load images from data folders. Code from Alex to unpack tar files not used since I don't store imgs in tar files here.

### Helper loading fcn

In [33]:
def load_images_from_folder(folder,s,cnn = False):
    '''
    Read data images and their expression label (integer) in from folder.
    '''
    # get list of image filenames
    images = []
    labels = []
    imagefolder = folder+'/images'
    k = os.listdir(imagefolder)[:s]
    
    # read in all images and labels from folder
    for filename in k:
        # add image to list
        img = plt.imread(os.path.join(folder+'/images',filename))
        if img is not None:
            images.append(img)
        
        # get filename for label for this image ('exp' files give expression label as int)
        number = int(re.search(r'\d+',  filename)[0])
        label_filename = folder+'/annotations/'+str(number)+'_exp.npy'
        
        # get expression label and add to list, or label as "not face" and warn if label not found
        if os.path.isfile(label_filename):
            label = np.load(label_filename)
        else:
            label = 10
            print("Label for image %s not found"%filename)
        labels.append(int(label))
        
    images = np.array(images)
    labels = np.array(labels)
    
    if cnn:
      images = np.transpose(torch.tensor(images),[0,3,2,1])
      trainfeature= resnet(images/255)
      trainfeature = [x.clone().detach() for x in trainfeature]
      data = zip(trainfeature, labels)
      data = list(data)
    
    else:
      images = np.transpose(torch.tensor(images),[0,3,2,1])
      images = images/255
      images = [x.clone().detach() for x in images]
      data = zip(images, labels)
      data =  list(data)
    
    return data,images

### Google Colab & Tar Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tarfile
import os
zip_ref = tarfile.TarFile('/content/drive/MyDrive/val_set.tar', 'r') 
zip_ref.extractall('/content') 
zip_ref.close()

zip_ref = tarfile.TarFile('/content/drive/MyDrive/train_set.tar', 'r') 
zip_ref.extractall('/content') 
zip_ref.close()

### Loading Training and Validation Data

In [47]:
traindata = load_images_from_folder('mie1517/data/train',4000,cnn = False)
print("Shape of training data: ")
print(np.shape(traindata))

Shape of training data: 
(2, 4000)


In [48]:
valdata,imgs = load_images_from_folder('mie1517/data/valid',400,cnn = False)
#print("Shape of validation data: ")
#print(np.shape(valdata))

## Model Setup
### ANN and Pretrained Models

In [36]:
alexnet = torchvision.models.alexnet(pretrained=True)
resnet = torchvision.models.resnet.resnet18(pretrained=False)

# features = ... load precomputed alexnet.features(img) ...
class classANN(nn.Module):
    
    def __init__(self):
        super(classANN, self).__init__()
        self.name = "class"
        
        self.fc1 = nn.Linear(256*6*6, 100)
        self.fc2 = nn.Linear(100, 20)
        self.fc3 = nn.Linear(20, 9)

    def forward(self, x):
        x = x.view(-1,256*6*6)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        x = x.squeeze(1) # Flatten to [batch_size]
        return x

### Accuracy and Training Helpers

In [37]:
def get_accuracy2(model, batch_size,dataname,train=False,):
    if train:
        data = dataname
    else:
        data = valdata

    correct = 0
    total = 0
    
    for imgs, labels in torch.utils.data.DataLoader(data, batch_size=32):
        if use_cuda and torch.cuda.is_available():
          imgs =  imgs.float().cuda()
          labels = labels.cuda()
            
        #select index with maximum prediction score
        model.cuda()
        output = model(imgs)
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]
        
    return correct / total

def train2(model, data, batch_size=64, learnrate=0.01,num_epochs=1):
    train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learnrate, momentum=0.9)

    iters, losses, train_acc, val_acc = [], [], [], []

    # training
    n = 0 # the number of iterations
    for epoch in range(num_epochs):
        print(epoch)
        for imgs, labels in iter(train_loader):
            if use_cuda and torch.cuda.is_available():
               imgs =  imgs.float().cuda()
               labels = labels.cuda()
            model.cuda()

            out = model(imgs)             # forward pass

            loss = criterion(out, labels) # compute the total loss
            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch

            # save the current training information
            iters.append(n)
            losses.append(float(loss)/batch_size)             # compute *average* loss
            train_acc.append(get_accuracy2(model, batch_size,data, train=True,)) # compute training accuracy 
            val_acc.append(get_accuracy2(model, batch_size, data,train=False,))  # compute validation accuracy
            n += 1
     
    # plotting
    plt.title("Training Curve")
    plt.plot(iters, losses, label="Train")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Training Curve")
    plt.plot(iters, train_acc, label="Train")
    plt.plot(iters, val_acc, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()


    print("Final Training Accuracy: {}".format(train_acc[-1]))
    print("Final Validation Accuracy: {}".format(val_acc[-1]))

## Model Testing

In [42]:
use_cuda = False
classnet = classANN()
net = resnet
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 9)
train2(net, traindata, batch_size=100, learnrate=0.01,num_epochs=10)

0


RuntimeError: each element in list of batch should be of equal size

In [41]:
#net = net.cuda()
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 9)
#net.fc = net.fc.cuda() 

In [None]:
feature

512

In [None]:
valdata[0][0][None,...].float().shape

torch.Size([1, 3, 224, 224])