In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms

# Define the CNN architecture
class Deep_Emotion(nn.Module):
    def __init__(self):
        super(Deep_Emotion, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 3)
        self.conv2 = nn.Conv2d(10, 10, 3)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(10, 10, 3)
        self.conv4 = nn.Conv2d(10, 10, 3)
        self.pool4 = nn.MaxPool2d(2, 2)

        self.norm = nn.BatchNorm2d(10)

        self.fc1 = nn.Linear(810, 50)
        self.fc2 = nn.Linear(50, 7)

        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        self.fc_loc = nn.Sequential(
            nn.Linear(640, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 640)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self, input):
        out = self.stn(input)

        out = F.relu(self.conv1(out))
        out = self.conv2(out)
        out = F.relu(self.pool2(out))

        out = F.relu(self.conv3(out))
        out = self.norm(self.conv4(out))
        out = F.relu(self.pool4(out))

        out = F.dropout(out)
        out = out.view(-1, 810)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [9]:
transform = transforms.Compose([
    transforms.Resize((48, 48)),  
    transforms.Grayscale(),       
    transforms.ToTensor(),        
    transforms.Normalize((0.5,), (0.5,)) 
])
train_data_path = 'face/train'
test_data_path = 'face/test'
train_dataset = ImageFolder(train_data_path, transform=transform)
test_dataset = ImageFolder(test_data_path, transform=transform)
batch_size = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Deep_Emotion().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 100

best_loss = float('inf')
patience = 5
counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    with tqdm(train_loader, unit='batch') as tepoch:
        tepoch.set_description(f'Epoch {epoch+1}/{num_epochs}')
        for inputs, labels in tepoch:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            tepoch.set_postfix(loss=running_loss / total, accuracy=correct / total)
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_accuracy = correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        counter = 0
        torch.save(model.state_dict(), 'emotion_model.pth')
    else:
        counter += 1
    if counter >= patience:
        print(f'Early stopping at epoch {epoch+1} as validation loss did not improve for {patience} consecutive epochs.')
        break

Epoch 1/100: 100%|█| 225/225 [08:29<00:00,  2.27s/batch, accuracy=0.311, loss=1.


Epoch 1/100, Loss: 1.7132, Accuracy: 0.3108


Epoch 2/100: 100%|█| 225/225 [07:03<00:00,  1.88s/batch, accuracy=0.392, loss=1.


Epoch 2/100, Loss: 1.5615, Accuracy: 0.3923


Epoch 3/100: 100%|█| 225/225 [06:54<00:00,  1.84s/batch, accuracy=0.431, loss=1.


Epoch 3/100, Loss: 1.4744, Accuracy: 0.4307


Epoch 4/100: 100%|█| 225/225 [07:18<00:00,  1.95s/batch, accuracy=0.448, loss=1.


Epoch 4/100, Loss: 1.4222, Accuracy: 0.4483


Epoch 5/100: 100%|█| 225/225 [06:43<00:00,  1.79s/batch, accuracy=0.467, loss=1.


Epoch 5/100, Loss: 1.3753, Accuracy: 0.4667


Epoch 6/100: 100%|█| 225/225 [07:00<00:00,  1.87s/batch, accuracy=0.476, loss=1.


Epoch 6/100, Loss: 1.3594, Accuracy: 0.4757


Epoch 7/100: 100%|█| 225/225 [06:49<00:00,  1.82s/batch, accuracy=0.486, loss=1.


Epoch 7/100, Loss: 1.3287, Accuracy: 0.4862


Epoch 8/100: 100%|█| 225/225 [07:24<00:00,  1.98s/batch, accuracy=0.499, loss=1.


Epoch 8/100, Loss: 1.3108, Accuracy: 0.4986


Epoch 9/100: 100%|█| 225/225 [07:09<00:00,  1.91s/batch, accuracy=0.506, loss=1.


Epoch 9/100, Loss: 1.2903, Accuracy: 0.5064


Epoch 10/100: 100%|█| 225/225 [06:55<00:00,  1.85s/batch, accuracy=0.513, loss=1


Epoch 10/100, Loss: 1.2707, Accuracy: 0.5131


Epoch 11/100: 100%|█| 225/225 [09:00<00:00,  2.40s/batch, accuracy=0.515, loss=1


Epoch 11/100, Loss: 1.2613, Accuracy: 0.5150


Epoch 12/100: 100%|█| 225/225 [08:30<00:00,  2.27s/batch, accuracy=0.52, loss=1.


Epoch 12/100, Loss: 1.2488, Accuracy: 0.5205


Epoch 13/100: 100%|█| 225/225 [06:20<00:00,  1.69s/batch, accuracy=0.525, loss=1


Epoch 13/100, Loss: 1.2367, Accuracy: 0.5246


Epoch 14/100: 100%|█| 225/225 [06:26<00:00,  1.72s/batch, accuracy=0.526, loss=1


Epoch 14/100, Loss: 1.2304, Accuracy: 0.5259


Epoch 15/100: 100%|█| 225/225 [06:40<00:00,  1.78s/batch, accuracy=0.538, loss=1


Epoch 15/100, Loss: 1.2102, Accuracy: 0.5378


Epoch 16/100: 100%|█| 225/225 [08:22<00:00,  2.23s/batch, accuracy=0.537, loss=1


Epoch 16/100, Loss: 1.2077, Accuracy: 0.5368


Epoch 17/100: 100%|█| 225/225 [08:33<00:00,  2.28s/batch, accuracy=0.539, loss=1


Epoch 17/100, Loss: 1.2057, Accuracy: 0.5387


Epoch 18/100: 100%|█| 225/225 [08:30<00:00,  2.27s/batch, accuracy=0.544, loss=1


Epoch 18/100, Loss: 1.1906, Accuracy: 0.5442


Epoch 19/100: 100%|█| 225/225 [08:08<00:00,  2.17s/batch, accuracy=0.547, loss=1


Epoch 19/100, Loss: 1.1867, Accuracy: 0.5471


Epoch 20/100: 100%|█| 225/225 [08:38<00:00,  2.30s/batch, accuracy=0.548, loss=1


Epoch 20/100, Loss: 1.1791, Accuracy: 0.5483


Epoch 21/100: 100%|█| 225/225 [08:27<00:00,  2.25s/batch, accuracy=0.555, loss=1


Epoch 21/100, Loss: 1.1672, Accuracy: 0.5547


Epoch 22/100: 100%|█| 225/225 [07:39<00:00,  2.04s/batch, accuracy=0.554, loss=1


Epoch 22/100, Loss: 1.1649, Accuracy: 0.5543


Epoch 23/100: 100%|█| 225/225 [08:59<00:00,  2.40s/batch, accuracy=0.555, loss=1


Epoch 23/100, Loss: 1.1608, Accuracy: 0.5554


Epoch 24/100: 100%|█| 225/225 [08:12<00:00,  2.19s/batch, accuracy=0.56, loss=1.


Epoch 24/100, Loss: 1.1527, Accuracy: 0.5601


Epoch 25/100: 100%|█| 225/225 [06:16<00:00,  1.67s/batch, accuracy=0.562, loss=1


Epoch 25/100, Loss: 1.1461, Accuracy: 0.5615


Epoch 26/100: 100%|█| 225/225 [06:37<00:00,  1.76s/batch, accuracy=0.563, loss=1


Epoch 26/100, Loss: 1.1442, Accuracy: 0.5629


Epoch 27/100: 100%|█| 225/225 [06:42<00:00,  1.79s/batch, accuracy=0.563, loss=1


Epoch 27/100, Loss: 1.1438, Accuracy: 0.5633


Epoch 28/100: 100%|█| 225/225 [06:02<00:00,  1.61s/batch, accuracy=0.567, loss=1


Epoch 28/100, Loss: 1.1389, Accuracy: 0.5665


Epoch 29/100: 100%|█| 225/225 [06:14<00:00,  1.67s/batch, accuracy=0.572, loss=1


Epoch 29/100, Loss: 1.1293, Accuracy: 0.5716


Epoch 30/100: 100%|█| 225/225 [06:17<00:00,  1.68s/batch, accuracy=0.57, loss=1.


Epoch 30/100, Loss: 1.1304, Accuracy: 0.5701


Epoch 31/100: 100%|█| 225/225 [06:29<00:00,  1.73s/batch, accuracy=0.569, loss=1


Epoch 31/100, Loss: 1.1259, Accuracy: 0.5692


Epoch 32/100: 100%|█| 225/225 [06:42<00:00,  1.79s/batch, accuracy=0.569, loss=1


Epoch 32/100, Loss: 1.1299, Accuracy: 0.5690


Epoch 33/100: 100%|█| 225/225 [06:56<00:00,  1.85s/batch, accuracy=0.568, loss=1


Epoch 33/100, Loss: 1.1233, Accuracy: 0.5677


Epoch 34/100: 100%|█| 225/225 [06:26<00:00,  1.72s/batch, accuracy=0.57, loss=1.


Epoch 34/100, Loss: 1.1244, Accuracy: 0.5697


Epoch 35/100: 100%|█| 225/225 [06:47<00:00,  1.81s/batch, accuracy=0.57, loss=1.


Epoch 35/100, Loss: 1.1232, Accuracy: 0.5703


Epoch 36/100: 100%|█| 225/225 [07:32<00:00,  2.01s/batch, accuracy=0.575, loss=1


Epoch 36/100, Loss: 1.1175, Accuracy: 0.5746


Epoch 37/100: 100%|█| 225/225 [06:54<00:00,  1.84s/batch, accuracy=0.574, loss=1


Epoch 37/100, Loss: 1.1175, Accuracy: 0.5740


Epoch 38/100: 100%|█| 225/225 [06:47<00:00,  1.81s/batch, accuracy=0.578, loss=1


Epoch 38/100, Loss: 1.1068, Accuracy: 0.5785


Epoch 39/100: 100%|█| 225/225 [06:48<00:00,  1.82s/batch, accuracy=0.581, loss=1


Epoch 39/100, Loss: 1.1015, Accuracy: 0.5812


Epoch 40/100: 100%|█| 225/225 [08:29<00:00,  2.27s/batch, accuracy=0.58, loss=1.


Epoch 40/100, Loss: 1.1034, Accuracy: 0.5796


Epoch 41/100: 100%|█| 225/225 [07:31<00:00,  2.01s/batch, accuracy=0.578, loss=1


Epoch 41/100, Loss: 1.1017, Accuracy: 0.5777


Epoch 42/100: 100%|█| 225/225 [06:23<00:00,  1.70s/batch, accuracy=0.58, loss=1.


Epoch 42/100, Loss: 1.1056, Accuracy: 0.5798


Epoch 43/100: 100%|█| 225/225 [06:19<00:00,  1.69s/batch, accuracy=0.585, loss=1


Epoch 43/100, Loss: 1.0940, Accuracy: 0.5851


Epoch 44/100: 100%|█| 225/225 [06:31<00:00,  1.74s/batch, accuracy=0.584, loss=1


Epoch 44/100, Loss: 1.0935, Accuracy: 0.5838


Epoch 45/100: 100%|█| 225/225 [06:12<00:00,  1.65s/batch, accuracy=0.586, loss=1


Epoch 45/100, Loss: 1.0896, Accuracy: 0.5863


Epoch 46/100: 100%|█| 225/225 [06:49<00:00,  1.82s/batch, accuracy=0.583, loss=1


Epoch 46/100, Loss: 1.0921, Accuracy: 0.5825


Epoch 47/100: 100%|█| 225/225 [06:06<00:00,  1.63s/batch, accuracy=0.583, loss=1


Epoch 47/100, Loss: 1.0942, Accuracy: 0.5830


Epoch 48/100: 100%|█| 225/225 [06:52<00:00,  1.83s/batch, accuracy=0.586, loss=1


Epoch 48/100, Loss: 1.0884, Accuracy: 0.5862


Epoch 49/100: 100%|█| 225/225 [06:49<00:00,  1.82s/batch, accuracy=0.585, loss=1


Epoch 49/100, Loss: 1.0884, Accuracy: 0.5855


Epoch 50/100: 100%|█| 225/225 [07:05<00:00,  1.89s/batch, accuracy=0.591, loss=1


Epoch 50/100, Loss: 1.0739, Accuracy: 0.5914


Epoch 51/100: 100%|█| 225/225 [06:51<00:00,  1.83s/batch, accuracy=0.588, loss=1


Epoch 51/100, Loss: 1.0805, Accuracy: 0.5880


Epoch 52/100: 100%|█| 225/225 [06:58<00:00,  1.86s/batch, accuracy=0.587, loss=1


Epoch 52/100, Loss: 1.0781, Accuracy: 0.5873


Epoch 53/100: 100%|█| 225/225 [07:03<00:00,  1.88s/batch, accuracy=0.587, loss=1


Epoch 53/100, Loss: 1.0802, Accuracy: 0.5871


Epoch 54/100: 100%|█| 225/225 [07:28<00:00,  1.99s/batch, accuracy=0.594, loss=1


Epoch 54/100, Loss: 1.0685, Accuracy: 0.5935


Epoch 55/100: 100%|█| 225/225 [07:12<00:00,  1.92s/batch, accuracy=0.593, loss=1


Epoch 55/100, Loss: 1.0731, Accuracy: 0.5928


Epoch 56/100: 100%|█| 225/225 [07:33<00:00,  2.02s/batch, accuracy=0.594, loss=1


Epoch 56/100, Loss: 1.0667, Accuracy: 0.5937


Epoch 57/100: 100%|█| 225/225 [06:26<00:00,  1.72s/batch, accuracy=0.591, loss=1


Epoch 57/100, Loss: 1.0683, Accuracy: 0.5913


Epoch 58/100: 100%|█| 225/225 [06:38<00:00,  1.77s/batch, accuracy=0.589, loss=1


Epoch 58/100, Loss: 1.0744, Accuracy: 0.5893


Epoch 59/100: 100%|█| 225/225 [06:50<00:00,  1.83s/batch, accuracy=0.593, loss=1


Epoch 59/100, Loss: 1.0665, Accuracy: 0.5935


Epoch 60/100: 100%|█| 225/225 [05:48<00:00,  1.55s/batch, accuracy=0.588, loss=1


Epoch 60/100, Loss: 1.0781, Accuracy: 0.5880


Epoch 61/100: 100%|█| 225/225 [07:20<00:00,  1.96s/batch, accuracy=0.594, loss=1


Epoch 61/100, Loss: 1.0656, Accuracy: 0.5935


Epoch 62/100: 100%|█| 225/225 [07:10<00:00,  1.91s/batch, accuracy=0.6, loss=1.0


Epoch 62/100, Loss: 1.0540, Accuracy: 0.5998


Epoch 63/100: 100%|█| 225/225 [06:19<00:00,  1.69s/batch, accuracy=0.592, loss=1


Epoch 63/100, Loss: 1.0671, Accuracy: 0.5925


Epoch 64/100: 100%|█| 225/225 [06:28<00:00,  1.73s/batch, accuracy=0.592, loss=1


Epoch 64/100, Loss: 1.0657, Accuracy: 0.5924


Epoch 65/100: 100%|█| 225/225 [06:31<00:00,  1.74s/batch, accuracy=0.591, loss=1


Epoch 65/100, Loss: 1.0706, Accuracy: 0.5908


Epoch 66/100: 100%|█| 225/225 [06:40<00:00,  1.78s/batch, accuracy=0.596, loss=1


Epoch 66/100, Loss: 1.0598, Accuracy: 0.5963


Epoch 67/100: 100%|█| 225/225 [06:49<00:00,  1.82s/batch, accuracy=0.595, loss=1

Epoch 67/100, Loss: 1.0616, Accuracy: 0.5946
Early stopping at epoch 67 as validation loss did not improve for 5 consecutive epochs.





In [1]:
import cv2
import torch
import numpy as np
from torchvision.transforms import transforms
from PIL import Image

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml')

# Function to detect and recognize emotions from an image
def detect_emotion(image_path):
    # Load the image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Detect faces in the image
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    # For each detected face, crop, resize, and recognize emotion
    for (x, y, w, h) in faces:
        # Crop the face region
        face = gray[y:y+h, x:x+w]
        # Resize the face image to match the input size of your CNN model
        face = cv2.resize(face, (48, 48))

        face_pil = Image.fromarray(face)
        transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        face_tensor = transform(face_pil).unsqueeze(0)
        model.eval()
        with torch.no_grad():
            outputs = model(face_tensor)
            _, predicted = torch.max(outputs, 1)

        emotion_label = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}[predicted.item()]
        cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
        cv2.putText(image, emotion_label, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.imshow('Emotion Detection', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Call the function with the image path
detect_emotion('images/IMG_0125.jpg')


  warn(


NameError: name 'model' is not defined

In [None]:
import cv2
import torch
from torchvision.transforms import transforms
from PIL import Image
# outputs = model(face_tensor)
image_path = 'images/IMG_0125.jpg'
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)

In [4]:
for (x, y, w, h) in faces:
        face = gray[y:y+h, x:x+w]
        face = cv2.resize(face, (48, 48))
        face_pil = Image.fromarray(face)
        transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        face_tensor = transform(face_pil).unsqueeze(0)
face_tensor

NameError: name 'faces' is not defined

In [14]:
model.eval()
with torch.no_grad():
    outputs = model(face_tensor)
    _, predicted = torch.max(outputs, 1)



In [17]:
print(outputs)
predicted

tensor([[ 0.7549, -3.5763,  0.3546, -0.2624,  1.1595,  1.0852, -0.5098]])


tensor([4])