In [None]:
import os
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import cv2

import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import OneHotEncoder
from torchvision import transforms
from tqdm import tqdm


## Load the dataset with Plain_Dataset class 

In [None]:
class Plain_Dataset(Dataset):
    def __init__(self,csv_file,img_dir,datatype,transform):
        '''
        Documentation
        NO OneHot encoding
        '''
        self.csv_file = pd.read_csv(csv_file)
        self.lables = self.csv_file['emotion']
        self.img_dir = img_dir
        self.transform = transform
        self.datatype = datatype

    def __len__(self):
        return len(self.csv_file)

    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img = Image.open(self.img_dir+self.datatype+str(idx)+'.jpg')
        lables = np.array(self.lables[idx])
        lables = torch.from_numpy(lables).long()


        if self.transform :
            img = self.transform(img)


        return img,lables


In [None]:
transformation = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
dataset = Plain_Dataset(csv_file='Dataset/Kaggle/test.csv',img_dir = 'test/',datatype = 'test',transform = transformation)
test_loader =  DataLoader(dataset,batch_size=64,num_workers=0)

#### Test the dataloader

In [None]:
imgg = dataset.__getitem__(250)[0]
lable = dataset.__getitem__(250)[1]

print(lable)
#(0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral)
imgnumpy = imgg.numpy()
imgt = imgnumpy.squeeze()
plt.imshow(imgt)
plt.show()

## Load the model

In [None]:
class Deep_Emotion(nn.Module):
    def __init__(self):
        '''
        Documentation
        '''
        super(Deep_Emotion,self).__init__()
        self.conv1 = nn.Conv2d(1,10,3)
        self.conv2 = nn.Conv2d(10,10,3)
        self.pool2 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(10,10,3)
        self.conv4 = nn.Conv2d(10,10,3)
        self.pool4 = nn.MaxPool2d(2,2)

        #self.dropout = nn.Dropout2d()

        self.fc1 = nn.Linear(810,50)
        self.fc2 = nn.Linear(50,7)

    def forward(self,input):
        out = self.conv1(input)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.pool2(out)
        out = F.relu(out)

        out = self.conv3(out)
        out = F.relu(out)

        out = self.conv4(out)
        out = self.pool4(out)
        out = F.relu(out)

        out = F.dropout(out)
        out = out.view(-1, 810) #####
        out = self.fc1(out)
        out = self.fc2(out)

        return out

In [None]:
model = torch.load("model_noSTN-20-128-0.005.pt")

## Model Evaluation 

In [None]:
model.eval()
total = []
with torch.no_grad():
    for data, lables in test_loader:
        data, lables = data.cuda(), lables.cuda()
        outputs = model(data)
        pred = F.softmax(outputs)
        classs = torch.argmax(pred,1)
        wrong = torch.where(classs != lables,torch.tensor([1.]).cuda(),torch.tensor([0.]).cuda())
        acc = 1- (torch.sum(wrong) / 64)
        total.append(acc.item())
    
       # _, predicted = torch.max(outputs.data, 1)
       # total += lables.size(0)
       # correct += (predicted == lables).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * np.mean(total)))

In [None]:
classes = ('Angry', 'Disgust', 'Fear', 'Happy',
           'Sad', 'Surprise', 'Neutral')
class_correct = list(0. for i in range(7))
class_total = list(0. for i in range(7))
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(7):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

## Test Single image

In [None]:
loader = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
def load_img(path):
    img = Image.open(path)
    img = loader(img).float()
    img = torch.autograd.Variable(img,requires_grad = True)
    img = img.unsqueeze(0)
    return img.cuda()

def test_img(path,save_name):
    #scale and convert to grayscale then save the image to import it with PIL.Image
    img = cv2.imread(path,0)
    img = cv2.resize(img,(48,48))
    cv2.imwrite(save_name,img)
    
    #load saved image with PIL
    PIL_img = load_img(path)
    out = model(PIL_img)
    pred = F.softmax(out)
    classs = torch.argmax(pred,1)
    wrong = torch.where(classs != 3,torch.tensor([1.]).cuda(),torch.tensor([0.]).cuda()) 
    classs = torch.argmax(pred,1)
    prediction = classes[classs.item()]
    print(prediction)

In [None]:
test_img('test2.jpg','test2.jpg')

## Test on Real-time

In [None]:
import cv2

# Load the cascade
face_cascade = cv2.CascadeClassifier('facedetection-master/haarcascade_frontalface_default.xml')

# To capture video from webcam. 
cap = cv2.VideoCapture(0)

while True:
    # Read the frame
    _, img = cap.read()

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect the faces
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    # Draw the rectangle around each face
    for (x, y, w, h) in faces:
        roi = img[y:y+h, x:x+w]
        roi = cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
        roi = cv2.resize(roi,(48,48))
        cv2.imwrite("roi.jpg", roi)
        cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

    imgg = load_img("roi.jpg")
    out = model(imgg)
    pred = F.softmax(out)
    classs = torch.argmax(pred,1)
    wrong = torch.where(classs != 3,torch.tensor([1.]).cuda(),torch.tensor([0.]).cuda())
    classs = torch.argmax(pred,1)
    prediction = classes[classs.item()]    

    font = cv2.FONT_HERSHEY_SIMPLEX   
    org = (50, 50) 
    fontScale = 1
    color = (255, 0, 0) 
    thickness = 2
    img = cv2.putText(img, prediction, org, font,  
                   fontScale, color, thickness, cv2.LINE_AA)
    
    cv2.imshow('img', img)
    # Stop if (Q) key is pressed
    k = cv2.waitKey(30) 
    if k==ord("q"):
        break
        
# Release the VideoCapture object
cap.release()

#### Helper function to get the correct predictions for supervised learning 

In [None]:
def get_correct_pred(preds,labels):
    return preds.argmax(dim=1).eq(labels).sum().item()
    