In [17]:
import cv2
import os

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# try reducing the size of the video

def get_emotion_label(filename):
    parts = filename.split('-')
    emotion_label = int(parts[2])
    emotions = {
        1: "neutral", 2: "calm", 3: "happy", 4: "sad",
        5: "angry", 6: "fearful", 7: "disgust", 8: "surprised"
    }
    return emotions.get(emotion_label, "unknown")

def get_frames(video_path, output_dir, frame_rate=1, network_size=(48,48)):
    filename = os.path.basename(video_path)
    emotion_label = get_emotion_label(filename)
    
    #create directory for emotion
    emotion_dir = os.path.join(output_dir, emotion_label)
    os.makedirs(emotion_dir, exist_ok=True)
    
    #read the video
    capture = cv2.VideoCapture(video_path)
    frame_count=0
    success=True
    
    #set frame interval based on the frame rate
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    frame_interval = int(fps / frame_rate)
    
    while success:
        success, frame = capture.read()
        # reduce size here than put into the face detector
        # keep the bigger size as well 
        if not success:
            break
        
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        if len(faces) > 0:
            x, y, w, h = faces[0]  # Use the first detected face
            face = gray_frame[y:y+h, x:x+w]  # Crop the face region
            face_resized = cv2.resize(face, network_size)  # Resize to a standard size for the network
        else:
            print("No face detected in this frame.")
        #save the frame at the specified time interval
        if frame_count % frame_interval == 0:
            frame_filename = f"{filename.split('.')[0]}_frame{frame_count}.jpg"
            frame_path = os.path.join(emotion_dir, frame_filename)
            cv2.imwrite(frame_path, face_resized)
            
        frame_count += 1
        
    capture.release()
    print(f"Extracted frames from {filename} into {emotion_dir}")

In [3]:
video_folder = "C:\\Users\\Lewis\\OneDrive - University of Glasgow\\Year 4\\Facial Emotion Recognition Project\\Actor_01"
output_folder = "C:\\Users\\Lewis\\OneDrive - University of Glasgow\\Year 4\\Facial Emotion Recognition Project\\dataset"

In [None]:
from multiprocessing import Pool
import os

# Function to process a single video
def process_video(video_file):
    video_path = os.path.join(video_folder, video_file)
    get_frames(video_path, output_folder, frame_rate=1/3)  # Adjust frame rate as needed

# Define the number of processes you want to run simultaneously
num_processes = 4

# Create a pool of worker processes
with Pool(num_processes) as pool:
    pool.map(process_video, [f for f in os.listdir(video_folder) if f.endswith(".mp4")])


In [20]:
for video_file in os.listdir(video_folder):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(video_folder, video_file)
        filename = os.path.basename(video_file)
        emotion_label = get_emotion_label(filename)
        get_frames(video_path, output_folder)

Extracted frames from 01-02-01-01-01-01-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\neutral
Extracted frames from 01-02-01-01-01-02-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\neutral
Extracted frames from 01-02-01-01-02-01-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\neutral
Extracted frames from 01-02-01-01-02-02-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\neutral
Extracted frames from 01-02-02-01-01-01-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\calm
Extracted frames from 01-02-02-01-01-02-01.mp4 into C:\Users\Lewis\OneDrive - University of Glasgow\Year 4\Facial Emotion Recognition Project\output_folder\calm
Extracted frames from 

In [10]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1), #safeguard for everything being grayscale
    transforms.ToTensor(), #turns everything into the pytorch tensor format
    transforms.Normalize((0.5,), (0.5,))
])

data_dir = "C:\\Users\\paulb\\OneDrive - University of Glasgow\\Year 4\\Facial Emotion Recognition Project\\dataset"

full_dataset = datasets.ImageFolder(data_dir, transform=transform)

train_size = int(0.7*len(full_dataset))
test_size = int(0.15*len(full_dataset))
val_size = len(full_dataset) - train_size - test_size
train_data, val_data, test_data = random_split(full_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [11]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleEmotionNN(nn.Module):
    def __init__(self):
        super(SimpleEmotionNN, self).__init__()
        
        #setting up our layers for the NN all the pixels down to the 7 emotion classes
        self.fullyConnected1 = nn.Linear(48*48, 128)
        self.fullyConnected2 = nn.Linear(128, 7)
        
        
    def forward(self, x):
        x = x.view(-1, 48*48)
        x = F.relu(self.fullyConnected1(x))
        x = self.fullyConnected2(x)
        return x

In [12]:
model = SimpleEmotionNN()
print(model)

SimpleEmotionNN(
  (fullyConnected1): Linear(in_features=2304, out_features=128, bias=True)
  (fullyConnected2): Linear(in_features=128, out_features=7, bias=True)
)


In [14]:
# Create a random tensor with shape (batch_size, 1, 48, 48) to simulate a batch of grayscale images
sample_input = torch.randn(32, 1, 48, 48)  # 32 images in the batch

# Pass the sample input through the model
sample_output = model(sample_input)
print("Output shape:", sample_output.shape)

# from my understanding it passes this down through the netwrok to get to a mapping of 7 from the original 2304

Output shape: torch.Size([32, 7])


In [15]:
import torch.optim as optim

loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [19]:
epochs = 100

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        # images, labels = images.to("cuda"), labels.to("cuda") #for PC
        images, labels = images.to("cpu"), labels.to("cpu") #for laptop
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = loss_func(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    avg_train_loss = running_loss / len(train_loader)
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            # images, labels = images.to("cuda"), labels.to("cuda")  # Use "cuda" if available
            images, labels = images.to("cpu"), labels.to("cpu")  # Use "cuda" if available

            # Forward pass on validation data
            outputs = model(images)
            loss = loss_func(outputs, labels)

            # Accumulate validation loss
            val_loss += loss.item()
    
    # Calculate average validation loss for this epoch
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")
    
    # plot the loss of training and validation 

Epoch 1/100, Training Loss: 1.0631, Validation Loss: 1.0468
Epoch 2/100, Training Loss: 1.0248, Validation Loss: 0.9893
Epoch 3/100, Training Loss: 0.9871, Validation Loss: 0.9561
Epoch 4/100, Training Loss: 0.9505, Validation Loss: 0.8957
Epoch 5/100, Training Loss: 0.9164, Validation Loss: 0.8796
Epoch 6/100, Training Loss: 0.8760, Validation Loss: 0.8871
Epoch 7/100, Training Loss: 0.8516, Validation Loss: 0.8577
Epoch 8/100, Training Loss: 0.8190, Validation Loss: 0.8235
Epoch 9/100, Training Loss: 0.7929, Validation Loss: 0.8254
Epoch 10/100, Training Loss: 0.7605, Validation Loss: 0.8055
Epoch 11/100, Training Loss: 0.7345, Validation Loss: 0.7687
Epoch 12/100, Training Loss: 0.7137, Validation Loss: 0.7481
Epoch 13/100, Training Loss: 0.6883, Validation Loss: 0.7325
Epoch 14/100, Training Loss: 0.6701, Validation Loss: 0.7351
Epoch 15/100, Training Loss: 0.6410, Validation Loss: 0.7313
Epoch 16/100, Training Loss: 0.6213, Validation Loss: 0.7141
Epoch 17/100, Training Loss: 0.61

In [30]:
model.eval()

test_loss = 0.0
correct_predictions = 0
total_predictions = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to("cpu"), labels.to("cpu") #cuda for PC
        # images, labels = images.to("cuda"), labels.to("cuda") #cuda for PC
        
        outputs = model(images)
        
        loss = loss_func(outputs, labels)
        test_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        
        print("Outputs: ", outputs[0])
        print("Predicted: ", predicted[:5])
        print("Actual: ", labels[:5])
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

avg_test_loss = test_loss / len(test_loader)
test_accuracy = correct_predictions/total_predictions

print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Outputs:  tensor([ 7.6915, -3.9909, -2.4412,  1.3584, -2.1371,  0.3975, -2.2771])
Predicted:  tensor([0, 1, 3, 3, 2])
Actual:  tensor([0, 1, 3, 3, 2])
Outputs:  tensor([ 2.7887, -1.5937,  2.4797, -2.6440, -2.6509,  4.9346, -3.7905])
Predicted:  tensor([5, 1, 1, 0, 1])
Actual:  tensor([5, 1, 1, 0, 3])
Outputs:  tensor([ 3.5777,  1.1832, -3.1422, -0.1974,  0.8015,  0.3301, -3.9841])
Predicted:  tensor([0, 5, 3, 0])
Actual:  tensor([0, 5, 3, 0])
Test Loss: 0.2783, Test Accuracy: 0.9118
