# Experiment 8

In [1]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [3]:
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json

In [4]:
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

challenges-in-representation-learning-facial-expression-recognition-challenge.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge.zip

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
replace example_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: example_submission.csv  
replace fer2013.tar.gz? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: fer2013.tar.gz          y
y
y

replace icml_face_data.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename:   inflating: icml_face_data.csv      y
y
y

replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename:   inflating: test.csv                
replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename:   inflating: train.csv               y
y



In [7]:
!pip install wandb onnx -Uq

# data

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [10]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mkonstantine25b[0m ([33mkonstantine25b-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [11]:
class FER2013Dataset(Dataset):
    def __init__(self, data_source, transform=None):
        if isinstance(data_source, str):
           self.data = pd.read_csv(data_source)
        else:
           self.data = data_source
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx]['pixels']
        emotion = self.data.iloc[idx]['emotion']

        image = np.array([int(pixel) for pixel in pixels.split()]).reshape(48, 48)
        image = image.astype(np.float32) / 255.0

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.FloatTensor(image).unsqueeze(0)

        return image, emotion

In [12]:
def get_transforms():
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Pad(4),
        transforms.RandomCrop(48),
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    val_test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    return train_transform, val_test_transform

In [13]:
def create_train_val_test_splits(csv_file):
    data = pd.read_csv(csv_file)

    train_size = int(0.72 * len(data))
    val_size = int(0.18 * len(data))
    test_size = len(data) - train_size - val_size

    train_data = data.iloc[:train_size]
    val_data = data.iloc[train_size:train_size + val_size]
    test_data = data.iloc[train_size + val_size:]

    return train_data, val_data, test_data

In [14]:
def create_dataloaders(csv_file, batch_size=64):
    train_transform, val_test_transform = get_transforms()

    train_data, val_data, test_data = create_train_val_test_splits(csv_file)

    train_dataset = FER2013Dataset(train_data, transform=train_transform)
    val_dataset = FER2013Dataset(val_data, transform=val_test_transform)
    test_dataset = FER2013Dataset(test_data, transform=val_test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader, test_loader

In [15]:
class BasicResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [16]:
class EnhancedCNN(nn.Module):
    def __init__(self, dropout_rate=0.3, hidden_dim=128):
        super(EnhancedCNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.resnet_block = BasicResidualBlock(64, 128, stride=2)

        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(128 * 3 * 3, hidden_dim)
        self.bn4 = nn.BatchNorm1d(hidden_dim)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_dim, 7)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))

        x = self.resnet_block(x)

        x = self.pool3(F.relu(self.bn3(self.conv3(x))))

        x = x.view(x.size(0), -1)
        x = F.relu(self.bn4(self.fc1(x)))
        x = self.dropout1(x)
        x = self.fc2(x)

        return x

In [17]:
def test_overfitting():
    print("Testing enhanced CNN with ResNet block on small dataset...")

    model = EnhancedCNN(dropout_rate=0.0).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_loader, _, _ = create_dataloaders('train.csv', batch_size=32)

    small_batch = []
    for i, (data, target) in enumerate(train_loader):
        small_batch.extend(list(zip(data, target)))
        if len(small_batch) >= 20:
            break

    small_batch = small_batch[:20]

    for epoch in range(30):
        model.train()
        total_loss = 0
        correct = 0

        batch_data = []
        batch_targets = []

        for data, target in small_batch:
            batch_data.append(data)
            batch_targets.append(target)

        batch_data = torch.stack(batch_data).to(device)
        batch_targets = torch.tensor(batch_targets).to(device)

        optimizer.zero_grad()
        output = model(batch_data)
        loss = criterion(output, batch_targets)
        loss.backward()
        optimizer.step()

        total_loss = loss.item()
        pred = output.argmax(dim=1)
        correct = pred.eq(batch_targets).sum().item()

        accuracy = 100. * correct / len(small_batch)

        print(f"Overfit Epoch {epoch+1}/30, Loss: {total_loss:.4f}, Acc: {accuracy:.2f}%")

        if accuracy >= 95.0:
            print("Enhanced CNN can overfit successfully!")
            break

    print("Overfitting test completed.\n")

In [18]:
def compute_loss(loader, model, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

    return total_loss / len(loader), 100. * correct / total

In [19]:
def train_model(config=None):
    with wandb.init(project="Facial_Expression_Recognition_8", config=config):
        config = wandb.config

        model = EnhancedCNN(dropout_rate=config.dropout_rate, hidden_dim=config.hidden_dim).to(device)
        train_loader, val_loader, test_loader = create_dataloaders('train.csv', config.batch_size)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.5, verbose=True)

        best_val_acc = 0
        best_model_path = f"best_model_{wandb.run.id}.pt"
        patience_counter = 0

        wandb.watch(model, log="gradients", log_freq=100)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')

            for batch_idx, (data, target) in enumerate(progress_bar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                pred = output.argmax(dim=1)
                train_correct += pred.eq(target).sum().item()
                train_total += target.size(0)

                if batch_idx % 50 == 0:
                    wandb.log({
                        "batch_loss": loss.item(),
                        "learning_rate": optimizer.param_groups[0]['lr'],
                        "epoch": epoch
                    })

                progress_bar.set_postfix({
                    'loss': f'{loss.item():.3f}',
                    'acc': f'{100.*train_correct/train_total:.1f}%'
                })

            train_acc = 100. * train_correct / train_total
            train_loss = train_loss / len(train_loader)

            val_loss, val_acc = compute_loss(val_loader, model, criterion, device)

            scheduler.step(val_acc)

            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_loss": val_loss,
                "val_accuracy": val_acc,
                "train_val_gap": train_acc - val_acc
            })

            print(f"Epoch {epoch+1}/{config.epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0

                torch.save({
                    'model_state_dict': model.state_dict(),
                    'model_config': {
                        'dropout_rate': config.dropout_rate,
                        'hidden_dim': config.hidden_dim
                    },
                    'training_config': dict(config),
                    'val_accuracy': val_acc,
                    'epoch': epoch
                }, best_model_path)

                model_artifact = wandb.Artifact(
                    name=f"best_model_{wandb.run.name}",
                    type="model",
                    description=f"Best enhanced CNN model with validation accuracy: {val_acc:.2f}%"
                )
                model_artifact.add_file(best_model_path)
                wandb.log_artifact(model_artifact)

                print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
            else:
                patience_counter += 1

            if patience_counter >= config.patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break

        wandb.log({"best_val_accuracy": best_val_acc})
        print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

        return best_val_acc


In [20]:
def evaluate_model_on_testset(model_path, test_loader):
    checkpoint = torch.load(model_path, map_location=device)

    model_config = checkpoint['model_config']
    model = EnhancedCNN(
        dropout_rate=model_config['dropout_rate'],
        hidden_dim=model_config['hidden_dim']
    ).to(device)

    model.load_state_dict(checkpoint['model_state_dict'])

    criterion = nn.CrossEntropyLoss()
    test_loss, test_acc = compute_loss(test_loader, model, criterion, device)

    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds, target_names=emotion_labels))

    cm = confusion_matrix(all_targets, all_preds)

    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title('Confusion Matrix - Facial Expression Recognition (Enhanced CNN)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    return test_acc, all_preds, all_targets


In [21]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'best_val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.0001,
            'max': 0.01
        },
        'batch_size': {
            'values': [32, 64]
        },
        'dropout_rate': {
            'distribution': 'uniform',
            'min': 0.2,
            'max': 0.5
        },
        'weight_decay': {
            'distribution': 'log_uniform_values',
            'min': 1e-5,
            'max': 1e-3
        },
        'hidden_dim': {
            'values': [128, 256]
        },
        'epochs': {
            'value': 8
        },
        'patience': {
            'value': 5
        }
    }
}

In [22]:
test_overfitting()

Testing enhanced CNN with ResNet block on small dataset...
Overfit Epoch 1/30, Loss: 1.9614, Acc: 15.00%
Overfit Epoch 2/30, Loss: 0.6638, Acc: 100.00%
Enhanced CNN can overfit successfully!
Overfitting test completed.



In [23]:
sweep_id = wandb.sweep(sweep_config, project="Facial_Expression_Recognition_8")

Create sweep with ID: lc6mrgl9
Sweep URL: https://wandb.ai/konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8/sweeps/lc6mrgl9


In [24]:
print("Running enhanced CNN hyperparameter sweep...")
wandb.agent(sweep_id, train_model, count=5)

Running enhanced CNN hyperparameter sweep...


[34m[1mwandb[0m: Agent Starting Run: vwx2z668 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.337528658373042
[34m[1mwandb[0m: 	epochs: 8
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002132563918843244
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 0.00017690014034959468


Epoch 1/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.61it/s, loss=1.640, acc=34.2%]


Epoch 1/8, Train Loss: 1.6595, Train Acc: 34.21%, Val Loss: 1.4650, Val Acc: 44.18%
New best model saved with validation accuracy: 44.18%


Epoch 2/8 [Train]: 100%|██████████| 323/323 [00:27<00:00, 11.72it/s, loss=1.406, acc=45.3%]


Epoch 2/8, Train Loss: 1.4137, Train Acc: 45.26%, Val Loss: 1.3356, Val Acc: 50.24%
New best model saved with validation accuracy: 50.24%


Epoch 3/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.57it/s, loss=1.236, acc=49.2%]


Epoch 3/8, Train Loss: 1.3218, Train Acc: 49.19%, Val Loss: 1.2195, Val Acc: 53.69%
New best model saved with validation accuracy: 53.69%


Epoch 4/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.52it/s, loss=1.129, acc=51.0%]


Epoch 4/8, Train Loss: 1.2709, Train Acc: 51.01%, Val Loss: 1.1964, Val Acc: 54.19%
New best model saved with validation accuracy: 54.19%


Epoch 5/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.56it/s, loss=1.441, acc=52.7%]


Epoch 5/8, Train Loss: 1.2395, Train Acc: 52.70%, Val Loss: 1.1499, Val Acc: 56.30%
New best model saved with validation accuracy: 56.30%


Epoch 6/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.84it/s, loss=1.241, acc=53.6%]


Epoch 6/8, Train Loss: 1.2109, Train Acc: 53.55%, Val Loss: 1.1721, Val Acc: 55.37%


Epoch 7/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.83it/s, loss=1.235, acc=54.3%]


Epoch 7/8, Train Loss: 1.1968, Train Acc: 54.34%, Val Loss: 1.1427, Val Acc: 57.15%
New best model saved with validation accuracy: 57.15%


Epoch 8/8 [Train]: 100%|██████████| 323/323 [00:25<00:00, 12.92it/s, loss=1.293, acc=55.1%]


Epoch 8/8, Train Loss: 1.1789, Train Acc: 55.12%, Val Loss: 1.1269, Val Acc: 57.62%
New best model saved with validation accuracy: 57.62%
Training completed. Best validation accuracy: 57.62%


0,1
batch_loss,█▆▆▆▄▃▃▅▃▄▂▄▃▂▁▃▃▂▃▃▂▂▂▂▂▂▃▂▃▃▃▂▂▁▄▂▂▃▂▂
best_val_accuracy,▁
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██████
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▅▆▇▇▇██
train_loss,█▄▃▂▂▁▁▁
train_val_gap,▁▅▆▇▆█▇▇
val_accuracy,▁▄▆▆▇▇██
val_loss,█▅▃▂▁▂▁▁

0,1
batch_loss,1.14121
best_val_accuracy,57.61564
epoch,7.0
learning_rate,0.00213
train_accuracy,55.11853
train_loss,1.1789
train_val_gap,-2.49711
val_accuracy,57.61564
val_loss,1.1269


[34m[1mwandb[0m: Agent Starting Run: g3ekz9xj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.43849976155038006
[34m[1mwandb[0m: 	epochs: 8
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00015287156081747542
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 0.00014084112339665596


Epoch 1/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.76it/s, loss=1.515, acc=29.4%]


Epoch 1/8, Train Loss: 1.7670, Train Acc: 29.40%, Val Loss: 1.5395, Val Acc: 41.34%
New best model saved with validation accuracy: 41.34%


Epoch 2/8 [Train]: 100%|██████████| 646/646 [00:26<00:00, 24.75it/s, loss=1.706, acc=39.7%]


Epoch 2/8, Train Loss: 1.5571, Train Acc: 39.72%, Val Loss: 1.4384, Val Acc: 45.21%
New best model saved with validation accuracy: 45.21%


Epoch 3/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.91it/s, loss=1.608, acc=43.9%]


Epoch 3/8, Train Loss: 1.4579, Train Acc: 43.89%, Val Loss: 1.3476, Val Acc: 49.43%
New best model saved with validation accuracy: 49.43%


Epoch 4/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.23it/s, loss=1.254, acc=46.5%]


Epoch 4/8, Train Loss: 1.3987, Train Acc: 46.46%, Val Loss: 1.3034, Val Acc: 51.21%
New best model saved with validation accuracy: 51.21%


Epoch 5/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.10it/s, loss=1.501, acc=48.5%]


Epoch 5/8, Train Loss: 1.3480, Train Acc: 48.46%, Val Loss: 1.2572, Val Acc: 52.97%
New best model saved with validation accuracy: 52.97%


Epoch 6/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.14it/s, loss=1.107, acc=49.9%]


Epoch 6/8, Train Loss: 1.3165, Train Acc: 49.93%, Val Loss: 1.2075, Val Acc: 54.94%
New best model saved with validation accuracy: 54.94%


Epoch 7/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.92it/s, loss=1.211, acc=50.8%]


Epoch 7/8, Train Loss: 1.2915, Train Acc: 50.77%, Val Loss: 1.1860, Val Acc: 55.22%
New best model saved with validation accuracy: 55.22%


Epoch 8/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.82it/s, loss=1.031, acc=51.8%]


Epoch 8/8, Train Loss: 1.2662, Train Acc: 51.76%, Val Loss: 1.1717, Val Acc: 56.51%
New best model saved with validation accuracy: 56.51%
Training completed. Best validation accuracy: 56.51%


0,1
batch_loss,▇▆█▆▆▅▅▄▅▄▄▃▄▂▃▄▄▃▅▅▃▃▃▃▅▁▂▃▃▂▃▂▂▂▄▃▁▂▂▃
best_val_accuracy,▁
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇████
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▄▆▆▇▇██
train_loss,█▅▄▃▂▂▁▁
train_val_gap,▁▇▇██▇██
val_accuracy,▁▃▅▆▆▇▇█
val_loss,█▆▄▄▃▂▁▁

0,1
batch_loss,1.39629
best_val_accuracy,56.51248
epoch,7.0
learning_rate,0.00015
train_accuracy,51.76101
train_loss,1.26621
train_val_gap,-4.75148
val_accuracy,56.51248
val_loss,1.17174


[34m[1mwandb[0m: Agent Starting Run: 37qfk9h8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.34604517298519144
[34m[1mwandb[0m: 	epochs: 8
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001090786630661674
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 7.051750904324007e-05


Epoch 1/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.51it/s, loss=1.603, acc=28.8%]


Epoch 1/8, Train Loss: 1.7660, Train Acc: 28.83%, Val Loss: 1.5655, Val Acc: 40.20%
New best model saved with validation accuracy: 40.20%


Epoch 2/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.39it/s, loss=1.815, acc=38.4%]


Epoch 2/8, Train Loss: 1.5816, Train Acc: 38.39%, Val Loss: 1.4319, Val Acc: 46.00%
New best model saved with validation accuracy: 46.00%


Epoch 3/8 [Train]: 100%|██████████| 646/646 [00:29<00:00, 22.14it/s, loss=1.854, acc=43.2%]


Epoch 3/8, Train Loss: 1.4767, Train Acc: 43.17%, Val Loss: 1.3509, Val Acc: 49.43%
New best model saved with validation accuracy: 49.43%


Epoch 4/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.89it/s, loss=1.559, acc=46.1%]


Epoch 4/8, Train Loss: 1.4137, Train Acc: 46.06%, Val Loss: 1.2941, Val Acc: 51.65%
New best model saved with validation accuracy: 51.65%


Epoch 5/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.64it/s, loss=1.334, acc=48.0%]


Epoch 5/8, Train Loss: 1.3674, Train Acc: 47.98%, Val Loss: 1.2549, Val Acc: 53.20%
New best model saved with validation accuracy: 53.20%


Epoch 6/8 [Train]: 100%|██████████| 646/646 [00:26<00:00, 24.07it/s, loss=1.547, acc=49.3%]


Epoch 6/8, Train Loss: 1.3298, Train Acc: 49.28%, Val Loss: 1.2224, Val Acc: 54.62%
New best model saved with validation accuracy: 54.62%


Epoch 7/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.52it/s, loss=1.233, acc=50.7%]


Epoch 7/8, Train Loss: 1.3017, Train Acc: 50.75%, Val Loss: 1.2215, Val Acc: 53.76%


Epoch 8/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.86it/s, loss=1.461, acc=51.6%]


Epoch 8/8, Train Loss: 1.2779, Train Acc: 51.64%, Val Loss: 1.1866, Val Acc: 55.37%
New best model saved with validation accuracy: 55.37%
Training completed. Best validation accuracy: 55.37%


0,1
batch_loss,▇▇▇▇▆██▅▅▆▅▆▅▅▄▄▄▄▄▂▃▅▄▄▄▄▄▄▄▄▃▄▄▂▅▃▃▁▂▂
best_val_accuracy,▁
epoch,▁▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇███████
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▄▅▆▇▇██
train_loss,█▅▄▃▂▂▁▁
train_val_gap,▁▄▅▆▆▆█▇
val_accuracy,▁▄▅▆▇█▇█
val_loss,█▆▄▃▂▂▂▁

0,1
batch_loss,1.27143
best_val_accuracy,55.37062
epoch,7.0
learning_rate,0.00011
train_accuracy,51.64006
train_loss,1.27793
train_val_gap,-3.73056
val_accuracy,55.37062
val_loss,1.1866


[34m[1mwandb[0m: Agent Starting Run: apax75ge with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.33655440709215656
[34m[1mwandb[0m: 	epochs: 8
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00012329545462085015
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 0.00017096877511150522


Epoch 1/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.68it/s, loss=1.794, acc=30.8%]


Epoch 1/8, Train Loss: 1.7326, Train Acc: 30.83%, Val Loss: 1.5390, Val Acc: 40.47%
New best model saved with validation accuracy: 40.47%


Epoch 2/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.78it/s, loss=1.305, acc=40.3%]


Epoch 2/8, Train Loss: 1.5397, Train Acc: 40.34%, Val Loss: 1.4278, Val Acc: 45.21%
New best model saved with validation accuracy: 45.21%


Epoch 3/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.48it/s, loss=1.588, acc=43.9%]


Epoch 3/8, Train Loss: 1.4463, Train Acc: 43.90%, Val Loss: 1.3285, Val Acc: 49.76%
New best model saved with validation accuracy: 49.76%


Epoch 4/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.56it/s, loss=1.254, acc=47.0%]


Epoch 4/8, Train Loss: 1.3855, Train Acc: 46.95%, Val Loss: 1.2992, Val Acc: 51.83%
New best model saved with validation accuracy: 51.83%


Epoch 5/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.33it/s, loss=1.398, acc=49.1%]


Epoch 5/8, Train Loss: 1.3322, Train Acc: 49.10%, Val Loss: 1.2560, Val Acc: 53.01%
New best model saved with validation accuracy: 53.01%


Epoch 6/8 [Train]: 100%|██████████| 646/646 [00:34<00:00, 18.68it/s, loss=1.214, acc=50.6%]


Epoch 6/8, Train Loss: 1.3008, Train Acc: 50.58%, Val Loss: 1.2262, Val Acc: 53.71%
New best model saved with validation accuracy: 53.71%


Epoch 7/8 [Train]: 100%|██████████| 646/646 [00:42<00:00, 15.08it/s, loss=1.245, acc=51.4%]


Epoch 7/8, Train Loss: 1.2717, Train Acc: 51.42%, Val Loss: 1.2027, Val Acc: 55.12%
New best model saved with validation accuracy: 55.12%


Epoch 8/8 [Train]: 100%|██████████| 646/646 [00:34<00:00, 18.98it/s, loss=1.421, acc=52.3%]


Epoch 8/8, Train Loss: 1.2499, Train Acc: 52.26%, Val Loss: 1.1905, Val Acc: 55.66%
New best model saved with validation accuracy: 55.66%
Training completed. Best validation accuracy: 55.66%


0,1
batch_loss,█▆▆▇▅▅▆▅▄▃▅▅▃▅▄▆▄▂▅▄▃▃▄▆▃▄▃▃▁▄▄▄▃▄▄▂▁▁▂▂
best_val_accuracy,▁
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇█████
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▄▅▆▇▇██
train_loss,█▅▄▃▂▂▁▁
train_val_gap,▁▆▅▆▇█▇█
val_accuracy,▁▃▅▆▇▇██
val_loss,█▆▄▃▂▂▁▁

0,1
batch_loss,1.20796
best_val_accuracy,55.66093
epoch,7.0
learning_rate,0.00012
train_accuracy,52.26415
train_loss,1.24992
train_val_gap,-3.39677
val_accuracy,55.66093
val_loss,1.19045


[34m[1mwandb[0m: Agent Starting Run: htc78v5b with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.3384300501597508
[34m[1mwandb[0m: 	epochs: 8
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00393918155437298
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 0.000512189951505187


Epoch 1/8 [Train]: 100%|██████████| 646/646 [00:34<00:00, 18.58it/s, loss=1.348, acc=32.6%]


Epoch 1/8, Train Loss: 1.6859, Train Acc: 32.65%, Val Loss: 1.5082, Val Acc: 40.93%
New best model saved with validation accuracy: 40.93%


Epoch 2/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.67it/s, loss=1.472, acc=43.0%]


Epoch 2/8, Train Loss: 1.4670, Train Acc: 43.03%, Val Loss: 1.4200, Val Acc: 46.29%
New best model saved with validation accuracy: 46.29%


Epoch 3/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.82it/s, loss=1.398, acc=44.7%]


Epoch 3/8, Train Loss: 1.4255, Train Acc: 44.71%, Val Loss: 1.3562, Val Acc: 48.67%
New best model saved with validation accuracy: 48.67%


Epoch 4/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.41it/s, loss=1.453, acc=46.1%]


Epoch 4/8, Train Loss: 1.3931, Train Acc: 46.07%, Val Loss: 1.3424, Val Acc: 48.96%
New best model saved with validation accuracy: 48.96%


Epoch 5/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.36it/s, loss=1.368, acc=47.1%]


Epoch 5/8, Train Loss: 1.3706, Train Acc: 47.15%, Val Loss: 1.3121, Val Acc: 48.93%


Epoch 6/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.53it/s, loss=1.325, acc=47.7%]


Epoch 6/8, Train Loss: 1.3575, Train Acc: 47.67%, Val Loss: 1.2814, Val Acc: 51.46%
New best model saved with validation accuracy: 51.46%


Epoch 7/8 [Train]: 100%|██████████| 646/646 [00:27<00:00, 23.91it/s, loss=1.413, acc=48.4%]


Epoch 7/8, Train Loss: 1.3444, Train Acc: 48.44%, Val Loss: 1.2872, Val Acc: 50.38%


Epoch 8/8 [Train]: 100%|██████████| 646/646 [00:28<00:00, 22.69it/s, loss=1.478, acc=48.6%]


Epoch 8/8, Train Loss: 1.3378, Train Acc: 48.60%, Val Loss: 1.2576, Val Acc: 52.82%
New best model saved with validation accuracy: 52.82%
Training completed. Best validation accuracy: 52.82%


0,1
batch_loss,█▇▅▅▆▂▆▃▇▅▃▆▄▂▅▃▂▂▄▆▃▁▄▁▃▂▂▂▂▃▂▄▄▁▁▂▄▁▂▂
best_val_accuracy,▁
epoch,▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇█████
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▆▆▇▇███
train_loss,█▄▃▂▂▁▁▁
train_val_gap,▁▆▆▇█▆█▅
val_accuracy,▁▄▆▆▆▇▇█
val_loss,█▆▄▃▃▂▂▁

0,1
batch_loss,1.24228
best_val_accuracy,52.81595
epoch,7.0
learning_rate,0.00394
train_accuracy,48.60184
train_loss,1.33776
train_val_gap,-4.21411
val_accuracy,52.81595
val_loss,1.25756


In [25]:
api = wandb.Api()
runs = api.runs("konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8")

best_run = None
best_val_acc = 0

In [26]:
for run in runs:
    if run.state == "finished" and "best_val_accuracy" in run.summary:
        val_acc = run.summary["best_val_accuracy"]
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_run = run

In [27]:
if best_run:
    print(f"\nBest run: {best_run.name}")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    print(f"Best hyperparameters: {best_run.config}")


    artifacts = api.artifact(f"konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8/best_model_{best_run.name}:latest")
    download_path = artifacts.download()

    print(f"Downloaded to: {download_path}")

    import os
    model_file_path = None


    if os.path.exists(os.path.join(download_path, "best_model.pth")):
        model_file_path = os.path.join(download_path, "best_model.pth")

    elif os.path.exists("best_model.pth"):
        model_file_path = "best_model.pth"
    else:
        for root, dirs, files in os.walk("."):
            if "best_model.pth" in files:
                model_file_path = os.path.join(root, "best_model.pth")
                break

    if model_file_path and os.path.exists(model_file_path):
        print(f"Found model file at: {model_file_path}")

        final_artifact = wandb.Artifact(
            name="final_best_resnet_model",
            type="model",
            description=f"Final best ResNet model with {best_val_acc:.2f}% validation accuracy"
        )
        final_artifact.add_file(model_file_path)

        with wandb.init(project="Facial_Expression_Recognition_8", name="final_model_upload"):
            wandb.log_artifact(final_artifact)
            wandb.log({
                "final_best_val_accuracy": best_val_acc,
                "model_architecture": "ResNet",
                "ready_for_testing": True
            })

        print(f"\nFinal model uploaded to wandb as 'final_best_resnet_model'")
        print("You can now load this model in the future for testing on any dataset!")

        print("\nTo test on a different dataset in the future, use:")
        print("evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')")

    else:
        print("Error: Could not find the downloaded model file!")
        print("Files in current directory:")
        for item in os.listdir("."):
            print(f"  {item}")

else:
    print("No successful runs found!")


Best run: scarlet-sweep-2
Best validation accuracy: 64.23%
Best hyperparameters: {'epochs': 30, 'patience': 5, 'batch_size': 32, 'hidden_dim': 128, 'dropout_rate': 0.32711369900475645, 'weight_decay': 0.00017971017828689558, 'learning_rate': 0.00048796996812685976}


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Downloaded to: /content/artifacts/best_model_scarlet-sweep-2:v19
Error: Could not find the downloaded model file!
Files in current directory:
  .config
  best_model_g3ekz9xj.pt
  best_model_htc78v5b.pt
  best_model_awmgyi60.pt
  artifacts
  best_model_apax75ge.pt
  best_model_39badffd.pt
  fer2013.tar.gz
  wandb
  icml_face_data.csv
  best_model_zscv2f9k.pt
  best_model_z7hc1n5p.pt
  best_model_37qfk9h8.pt
  test.csv
  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  konstantine25b-free-university-of-tbilisi-
  train.csv
  best_model_0qz3y5is.pt
  best_model_3i8ct7f6.pt
  best_model_vwx2z668.pt
  best_model_un860siz.pt
  best_model_keexmwl9.pt
  example_submission.csv
  drive
  sample_data
