In [1]:
import pandas as pd
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import KFold
from torch.utils.data import Subset
from timm import create_model
pd.set_option('display.max_colwidth', None)

In [2]:
df = pd.read_csv("dataset/label.csv", sep=',')
rename_feelings = {"Happy": "positive","Sad": "negative","Surprised":"positive","Neutral":"positive","Contempt": "negative","Disgust":"negative","Fear": "negative", "Anger": "negative"}
df['emotion'] = df['emotion'].replace(rename_feelings)
df = df.sample(frac=1).reset_index(drop=True)
label_encoder = LabelEncoder()
df['emotion_encoded'] = label_encoder.fit_transform(df['emotion'])
display(df.head(10))

Unnamed: 0,image,emotion,emotion_encoded
0,48.jpg,negative,0
1,114.jpg,negative,0
2,105.jpg,negative,0
3,90.jpg,negative,0
4,66.jpg,negative,0
5,150.jpg,negative,0
6,136.jpg,negative,0
7,6.jpg,negative,0
8,77.jpg,positive,1
9,41.jpg,negative,0


In [3]:
class extractImageFeatureResNetDataSet():
    def __init__(self, data):
        self.data = data
        self.scaler = transforms.Resize([224, 224])
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
        self.to_tensor = transforms.ToTensor()
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        image_name = self.data.iloc[idx]['image']
        img_loc = 'dataset/images/'+str(image_name)
        img = Image.open(img_loc)
        t_img = self.normalize(self.to_tensor(self.scaler(img)))
        return t_img, self.data.iloc[idx]['emotion_encoded'].item()

In [4]:
train_ImageDataset_ResNet = extractImageFeatureResNetDataSet(df)

In [5]:
device = 'cuda'
model = create_model('vit_base_patch16_224', pretrained=True, num_classes=2)  # Using a pretrained ViT model
model = model.to(device)

# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)

In [6]:
def train_model(epochs=20):
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    highest_accuracy = 0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        predict_arr, gt_arr = [], []
        for fold, (train_idx, val_idx) in enumerate(kfold.split(train_ImageDataset_ResNet)):
            train_subset = Subset(train_ImageDataset_ResNet, train_idx)
            val_subset = Subset(train_ImageDataset_ResNet, val_idx)
            train_loader = DataLoader(train_subset, batch_size=4, shuffle=True, num_workers=0)
            val_loader = DataLoader(val_subset, batch_size=4, shuffle=False, num_workers=0)
            for images, labels in train_loader:        
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
            model.eval()
            with torch.no_grad():
                for images, labels in val_loader:        
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    running_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    predict_arr = predict_arr + predicted.cpu().tolist()
                    gt_arr = gt_arr + labels.cpu().tolist()                    
        accuracy = accuracy_score(predict_arr, gt_arr)
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader)}, Accuracy: {accuracy*100:.2f}%')
            
        if highest_accuracy < accuracy:
            print("Saving this model")
            torch.save(model.state_dict(), 'best.pt')
            highest_accuracy = accuracy


In [7]:
train_model()

Epoch [1/20], Loss: 1.2221704707030328, Accuracy: 44.08%
Saving this model
Epoch [2/20], Loss: 0.9061198027864579, Accuracy: 59.21%
Saving this model
Epoch [3/20], Loss: 0.9174340266373849, Accuracy: 55.92%
Epoch [4/20], Loss: 0.8821507278949984, Accuracy: 59.21%
Epoch [5/20], Loss: 0.8770522359878786, Accuracy: 62.50%
Saving this model
Epoch [6/20], Loss: 0.8927337661866219, Accuracy: 63.16%
Saving this model
Epoch [7/20], Loss: 0.8189782442585114, Accuracy: 66.45%
Saving this model
Epoch [8/20], Loss: 0.8060295975977375, Accuracy: 62.50%
Epoch [9/20], Loss: 0.7656565868566113, Accuracy: 67.76%
Saving this model
Epoch [10/20], Loss: 1.185576147189544, Accuracy: 70.39%
Saving this model
Epoch [11/20], Loss: 0.5603437128687098, Accuracy: 81.58%
Saving this model
Epoch [12/20], Loss: 0.4542191581680409, Accuracy: 83.55%
Saving this model
Epoch [13/20], Loss: 0.352031247938172, Accuracy: 88.82%
Saving this model
Epoch [14/20], Loss: 0.8501323681444891, Accuracy: 67.11%
Epoch [15/20], Loss