In [2]:
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image

In [3]:
# Custom dataset class for OpenCV
class EmotionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.image_paths = []
        self.labels = []
        
        for i, emotion_class in enumerate(self.classes):
            class_path = os.path.join(root_dir, emotion_class)
            for image_name in os.listdir(class_path):
                self.image_paths.append(os.path.join(class_path, image_name))
                self.labels.append(i)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        
        if self.transform:
            image = self.transform(image)
        
        label = self.labels[idx]
        return image, label

In [4]:
# Custom transforms for OpenCV
class OpenCVTransform:
    def __init__(self, size=(48, 48)):
        self.size = size
        
    def __call__(self, image):
        # Resize image
        image = cv2.resize(image, self.size)
        
        # Random rotation
        if np.random.random() < 0.5:
            angle = np.random.uniform(-5, 5)
            M = cv2.getRotationMatrix2D((self.size[0]//2, self.size[1]//2), angle, 1.0)
            image = cv2.warpAffine(image, M, self.size)
        
        # Random horizontal flip
        if np.random.random() < 0.5:
            image = cv2.flip(image, 1)
            
        # Normalize
        image = image.astype(np.float32) / 255.0
        image = (image - 0.5) / 0.5
        
        # Convert to tensor
        image = torch.from_numpy(image).unsqueeze(0)  # Add channel dimension
        return image

# Create transforms
transform_train = OpenCVTransform()
transform_val = OpenCVTransform()

# Create datasets
train_dataset = EmotionDataset(
    "D:/projects/machine learning/Expression-recognition/jonathanheix dataset/images/train",
    transform=transform_train
)

val_dataset = EmotionDataset(
    "D:/projects/machine learning/Expression-recognition/jonathanheix dataset/images/validation",
    transform=transform_val
)

# Create dataloaders
loaded_train = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
loaded_validation = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

In [None]:
def show_image(image, title=None):
    # Handle CUDA tensor
    if torch.is_tensor(image):
        # Move to CPU and detach from computation graph
        image = image.detach().cpu().numpy()
        if len(image.shape) == 3:
            image = np.transpose(image, (1, 2, 0))  # CHW to HWC
        
    # Denormalize
    image = (image * 0.5 + 0.5) * 255.0
    image = image.astype(np.uint8)
    
    # Display image
    cv2.imshow(title if title else 'Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Example usage:
dataiter = iter(loaded_train)
images, labels = next(dataiter)
# Move batch to GPU
images = images.to(device)
labels = labels.to(device)

expression = {0: "angry", 1: "disgust", 2: "fear", 3: "happy", 
             4: "neutral", 5: "sad", 6: "surprise"}

# Display a random image and its label
random_idx = random.randint(0, images.size(0)-1)
print("Target label:", expression[int(labels[random_idx].cpu())])
show_image(images[random_idx][0], f"Expression: {expression[int(labels[random_idx].cpu())]}")

In [None]:
def show_predictions(model, dataloader, device, num_images=5):
    model.eval()
    dataiter = iter(dataloader)
    images, labels = next(dataiter)
    images, labels = images.to(device), labels.to(device)

    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

    expression = {0: "angry", 1: "disgust", 2: "fear", 3: "happy", 
                 4: "neutral", 5: "sad", 6: "surprise"}

    # Display images with predictions
    for i in range(num_images):
        random_idx = random.randint(0, len(images) - 1)
        image = images[random_idx][0].cpu().numpy()
        
        # Denormalize
        image = (image * 0.5 + 0.5) * 255.0
        image = image.astype(np.uint8)
        
        # Add text to image
        title = f"Pred: {expression[int(predicted[random_idx].cpu().numpy())]}\nTrue: {expression[int(labels[random_idx].cpu().numpy())]}"
        cv2.putText(image, title.split('\n')[0], (10, 20), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(image, title.split('\n')[1], (10, 40), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # Show image
        cv2.imshow(f'Image {i+1}', image)
    
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage:
show_predictions(model, loaded_validation, device)