<a href="https://colab.research.google.com/github/konstantine25b/Neural_Network_ML_Facial_Expression_Recognition_Challenge/blob/main/Facial_Expression_Recognition_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lets start out new experiment #4

In [17]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m84.1 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive


In [18]:
! mkdir ~/.kaggle

In [19]:
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json

In [20]:
! chmod 600 ~/.kaggle/kaggle.json

In [21]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 84% 241M/285M [00:00<00:00, 419MB/s]
100% 285M/285M [00:00<00:00, 464MB/s]


In [22]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge.zip

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [23]:
!pip install wandb onnx -Uq

# data staff

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [26]:
class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx]['pixels']
        emotion = self.data.iloc[idx]['emotion']

        image = np.array([int(pixel) for pixel in pixels.split()]).reshape(48, 48)
        image = image.astype(np.float32) / 255.0

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.FloatTensor(image).unsqueeze(0)

        return image, emotion


In [27]:
def get_transforms():
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.1, hue=0.1),
        transforms.RandomAffine(degrees=0, scale=(0.8, 1.2)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    val_test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    return train_transform, val_test_transform

In [28]:
def create_train_val_loaders(csv_file, batch_size=64, train_split=0.8):
    train_transform, val_transform = get_transforms()

    full_dataset = FER2013Dataset(csv_file, transform=None)

    train_size = int(train_split * len(full_dataset))
    val_size = len(full_dataset) - train_size

    train_dataset, val_dataset = random_split(
        full_dataset, [train_size, val_size],
        generator=torch.Generator().manual_seed(42)
    )

    train_dataset.dataset.transform = train_transform
    val_dataset.dataset.transform = val_transform

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader

In [29]:
def create_test_loader(csv_file, batch_size=64):
    _, test_transform = get_transforms()

    test_dataset = FER2013Dataset(csv_file, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return test_loader

# Resnet training

In [30]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.dropout = nn.Dropout2d(0.1)

    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = self.dropout(out)
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out)

        return out


In [31]:
class FacialExpressionResNet(nn.Module):
    def __init__(self, num_classes=7, dropout_rate=0.5):
        super(FacialExpressionResNet, self).__init__()

        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(512, num_classes)

        self._initialize_weights()

    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride, downsample))

        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        return x

In [39]:
def test_overfitting():
    print("Testing model architecture with overfitting on small dataset...")

    model = FacialExpressionResNet(dropout_rate=0.3).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_loader, _ = create_train_val_loaders('train.csv', batch_size=32)

    small_batch = []
    for i, (data, target) in enumerate(train_loader):
        small_batch.extend(list(zip(data, target)))
        if len(small_batch) >= 20:
            break

    small_batch = small_batch[:20]

    for epoch in range(30):
        model.train()
        total_loss = 0
        correct = 0

        for data, target in small_batch:
            data, target = data.unsqueeze(0).to(device), torch.tensor([target]).to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

        accuracy = 100. * correct / len(small_batch)
        avg_loss = total_loss / len(small_batch)

        print(f"Overfit Epoch {epoch+1}/30, Loss: {avg_loss:.4f}, Acc: {accuracy:.2f}%")

        if accuracy >= 95.0:
            print("Model can overfit successfully!")
            break

    print("Overfitting test completed.\n")

In [33]:
def compute_loss(loader, model, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

    return total_loss / len(loader), 100. * correct / total

In [40]:
def train_model(config=None):
    with wandb.init(project="Facial_Expression_Recognition_4", config=config):
        config = wandb.config

        model = FacialExpressionResNet(dropout_rate=config.dropout_rate).to(device)
        train_loader, val_loader = create_train_val_loaders('train.csv', config.batch_size)

        criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing)
        optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        scheduler = optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=config.learning_rate,
            epochs=config.epochs,
            steps_per_epoch=len(train_loader),
            pct_start=0.3,
            anneal_strategy='cos'
        )

        best_val_acc = 0
        patience_counter = 0

        wandb.watch(model, log="all", log_freq=100)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')

            for batch_idx, (data, target) in enumerate(progress_bar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

                optimizer.step()
                scheduler.step()

                train_loss += loss.item()
                pred = output.argmax(dim=1)
                train_correct += pred.eq(target).sum().item()
                train_total += target.size(0)

                if batch_idx % 50 == 0:
                    current_lr = scheduler.get_last_lr()[0]
                    wandb.log({
                        "batch_loss": loss.item(),
                        "learning_rate": current_lr,
                        "epoch": epoch
                    })

                progress_bar.set_postfix({
                    'loss': f'{loss.item():.3f}',
                    'acc': f'{100.*train_correct/train_total:.1f}%'
                })

            train_acc = 100. * train_correct / train_total
            train_loss = train_loss / len(train_loader)

            val_loss, val_acc = compute_loss(val_loader, model, criterion, device)

            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_loss": val_loss,
                "val_accuracy": val_acc
            })

            print(f"Epoch {epoch+1}/{config.epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0

                model_artifact = wandb.Artifact(
                    name=f"best_model_{wandb.run.name}",
                    type="model",
                    description=f"Best ResNet model with validation accuracy: {val_acc:.2f}%"
                )

                model_save_dict = {
                    'model_state_dict': model.state_dict(),
                    'model_config': {
                        'num_classes': 7,
                        'dropout_rate': config.dropout_rate
                    },
                    'training_config': dict(config),
                    'val_accuracy': val_acc,
                    'epoch': epoch,
                    'model_architecture': 'FacialExpressionResNet'
                }

                torch.save(model_save_dict, "best_model.pth")

                model_artifact.add_file("best_model.pth")
                wandb.log_artifact(model_artifact)

                print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
            else:
                patience_counter += 1

            if patience_counter >= config.patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break

        wandb.log({
            "best_val_accuracy": best_val_acc
        })

        print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

        return best_val_acc

In [35]:
def evaluate_model_on_testset(model_artifact_path, test_csv_file, batch_size=64):
    checkpoint = torch.load(model_artifact_path, map_location=device)

    model_config = checkpoint['model_config']
    model = FacialExpressionResNet(
        num_classes=model_config['num_classes'],
        dropout_rate=model_config['dropout_rate']
    ).to(device)

    model.load_state_dict(checkpoint['model_state_dict'])

    test_loader = create_test_loader(test_csv_file, batch_size)

    criterion = nn.CrossEntropyLoss()
    test_loss, test_acc = compute_loss(test_loader, model, criterion, device)

    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds, target_names=emotion_labels))

    cm = confusion_matrix(all_targets, all_preds)

    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title('Confusion Matrix - Facial Expression Recognition (ResNet)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    return test_acc, all_preds, all_targets

# hyperparams

In [36]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'best_val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.0001,
            'max': 0.01
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'dropout_rate': {
            'distribution': 'uniform',
            'min': 0.3,
            'max': 0.7
        },
        'weight_decay': {
            'distribution': 'log_uniform_values',
            'min': 1e-6,
            'max': 1e-3
        },
        'label_smoothing': {
            'distribution': 'uniform',
            'min': 0.0,
            'max': 0.2
        },
        'epochs': {
            'value': 25
        },
        'patience': {
            'value': 8
        }
    }
}

# lets start testing

In [38]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkonstantine25b[0m ([33mkonstantine25b-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [41]:
test_overfitting()

Testing model architecture with overfitting on small dataset...
Overfit Epoch 1/30, Loss: 2.4551, Acc: 15.00%
Overfit Epoch 2/30, Loss: 1.9424, Acc: 10.00%
Overfit Epoch 3/30, Loss: 1.8984, Acc: 20.00%
Overfit Epoch 4/30, Loss: 1.7043, Acc: 20.00%
Overfit Epoch 5/30, Loss: 1.5219, Acc: 35.00%
Overfit Epoch 6/30, Loss: 1.2151, Acc: 70.00%
Overfit Epoch 7/30, Loss: 0.9075, Acc: 75.00%
Overfit Epoch 8/30, Loss: 0.6455, Acc: 75.00%
Overfit Epoch 9/30, Loss: 0.4682, Acc: 95.00%
Model can overfit successfully!
Overfitting test completed.



In [42]:
sweep_id = wandb.sweep(sweep_config, project="Facial_Expression_Recognition_4")

Create sweep with ID: 0zp8zixp
Sweep URL: https://wandb.ai/konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_4/sweeps/0zp8zixp


In [43]:
print("Running hyperparameter sweep with ResNet architecture...")
wandb.agent(sweep_id, train_model, count=3)

Running hyperparameter sweep with ResNet architecture...


[34m[1mwandb[0m: Agent Starting Run: euczqib4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3362882088171564
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	label_smoothing: 0.1560930322152969
[34m[1mwandb[0m: 	learning_rate: 0.001110935709956254
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 7.043022279825436e-05


Epoch 1/25 [Train]: 100%|██████████| 180/180 [00:26<00:00,  6.89it/s, loss=1.747, acc=27.1%]


Epoch 1/25, Train Loss: 1.8273, Train Acc: 27.13%, Val Loss: 1.7257, Val Acc: 35.74%
New best model saved with validation accuracy: 35.74%


Epoch 2/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.95it/s, loss=1.589, acc=38.1%]


Epoch 2/25, Train Loss: 1.6951, Train Acc: 38.06%, Val Loss: 1.6511, Val Acc: 41.43%
New best model saved with validation accuracy: 41.43%


Epoch 3/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.98it/s, loss=1.515, acc=43.7%]


Epoch 3/25, Train Loss: 1.6142, Train Acc: 43.74%, Val Loss: 1.6057, Val Acc: 44.90%
New best model saved with validation accuracy: 44.90%


Epoch 4/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.01it/s, loss=1.555, acc=47.9%]


Epoch 4/25, Train Loss: 1.5467, Train Acc: 47.86%, Val Loss: 1.5569, Val Acc: 46.95%
New best model saved with validation accuracy: 46.95%


Epoch 5/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.23it/s, loss=1.386, acc=50.6%]


Epoch 5/25, Train Loss: 1.4989, Train Acc: 50.63%, Val Loss: 1.5696, Val Acc: 47.96%
New best model saved with validation accuracy: 47.96%


Epoch 6/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.27it/s, loss=1.561, acc=53.6%]


Epoch 6/25, Train Loss: 1.4596, Train Acc: 53.58%, Val Loss: 1.5318, Val Acc: 50.17%
New best model saved with validation accuracy: 50.17%


Epoch 7/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.30it/s, loss=1.414, acc=56.6%]


Epoch 7/25, Train Loss: 1.4148, Train Acc: 56.56%, Val Loss: 1.5156, Val Acc: 50.59%
New best model saved with validation accuracy: 50.59%


Epoch 8/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.23it/s, loss=1.575, acc=60.0%]


Epoch 8/25, Train Loss: 1.3614, Train Acc: 59.96%, Val Loss: 1.4862, Val Acc: 53.64%
New best model saved with validation accuracy: 53.64%


Epoch 9/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.44it/s, loss=1.295, acc=64.1%]


Epoch 9/25, Train Loss: 1.2966, Train Acc: 64.07%, Val Loss: 1.4691, Val Acc: 54.46%
New best model saved with validation accuracy: 54.46%


Epoch 10/25 [Train]: 100%|██████████| 180/180 [00:33<00:00,  5.41it/s, loss=1.192, acc=68.4%]


Epoch 10/25, Train Loss: 1.2216, Train Acc: 68.42%, Val Loss: 1.4792, Val Acc: 54.74%
New best model saved with validation accuracy: 54.74%


Epoch 11/25 [Train]: 100%|██████████| 180/180 [00:27<00:00,  6.60it/s, loss=1.141, acc=73.3%]


Epoch 11/25, Train Loss: 1.1382, Train Acc: 73.33%, Val Loss: 1.4981, Val Acc: 55.49%
New best model saved with validation accuracy: 55.49%


Epoch 12/25 [Train]: 100%|██████████| 180/180 [00:27<00:00,  6.52it/s, loss=1.192, acc=77.8%]


Epoch 12/25, Train Loss: 1.0598, Train Acc: 77.81%, Val Loss: 1.5100, Val Acc: 55.61%
New best model saved with validation accuracy: 55.61%


Epoch 13/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.99it/s, loss=0.880, acc=82.6%]


Epoch 13/25, Train Loss: 0.9774, Train Acc: 82.60%, Val Loss: 1.5534, Val Acc: 55.12%


Epoch 14/25 [Train]: 100%|██████████| 180/180 [00:26<00:00,  6.82it/s, loss=0.919, acc=86.6%]


Epoch 14/25, Train Loss: 0.9035, Train Acc: 86.61%, Val Loss: 1.5746, Val Acc: 55.96%
New best model saved with validation accuracy: 55.96%


Epoch 15/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.99it/s, loss=0.964, acc=89.8%]


Epoch 15/25, Train Loss: 0.8415, Train Acc: 89.80%, Val Loss: 1.5997, Val Acc: 55.85%


Epoch 16/25 [Train]: 100%|██████████| 180/180 [00:26<00:00,  6.92it/s, loss=0.736, acc=92.4%]


Epoch 16/25, Train Loss: 0.7928, Train Acc: 92.42%, Val Loss: 1.6162, Val Acc: 56.57%
New best model saved with validation accuracy: 56.57%


Epoch 17/25 [Train]: 100%|██████████| 180/180 [00:24<00:00,  7.23it/s, loss=0.839, acc=94.4%]


Epoch 17/25, Train Loss: 0.7552, Train Acc: 94.38%, Val Loss: 1.6463, Val Acc: 56.70%
New best model saved with validation accuracy: 56.70%


Epoch 18/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.08it/s, loss=0.693, acc=96.0%]


Epoch 18/25, Train Loss: 0.7243, Train Acc: 96.00%, Val Loss: 1.6592, Val Acc: 56.91%
New best model saved with validation accuracy: 56.91%


Epoch 19/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.06it/s, loss=0.763, acc=97.2%]


Epoch 19/25, Train Loss: 0.7000, Train Acc: 97.20%, Val Loss: 1.6460, Val Acc: 57.78%
New best model saved with validation accuracy: 57.78%


Epoch 20/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.13it/s, loss=0.684, acc=97.9%]


Epoch 20/25, Train Loss: 0.6838, Train Acc: 97.94%, Val Loss: 1.6796, Val Acc: 57.77%


Epoch 21/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.19it/s, loss=0.738, acc=98.6%]


Epoch 21/25, Train Loss: 0.6707, Train Acc: 98.60%, Val Loss: 1.6724, Val Acc: 57.96%
New best model saved with validation accuracy: 57.96%


Epoch 22/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  7.07it/s, loss=0.671, acc=99.0%]


Epoch 22/25, Train Loss: 0.6614, Train Acc: 99.02%, Val Loss: 1.6617, Val Acc: 58.22%
New best model saved with validation accuracy: 58.22%


Epoch 23/25 [Train]: 100%|██████████| 180/180 [00:26<00:00,  6.88it/s, loss=0.640, acc=99.2%]


Epoch 23/25, Train Loss: 0.6570, Train Acc: 99.23%, Val Loss: 1.6643, Val Acc: 58.17%


Epoch 24/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.93it/s, loss=0.643, acc=99.4%]


Epoch 24/25, Train Loss: 0.6530, Train Acc: 99.38%, Val Loss: 1.6667, Val Acc: 58.10%


Epoch 25/25 [Train]: 100%|██████████| 180/180 [00:25<00:00,  6.99it/s, loss=0.662, acc=99.5%]


Epoch 25/25, Train Loss: 0.6522, Train Acc: 99.46%, Val Loss: 1.6704, Val Acc: 58.38%
New best model saved with validation accuracy: 58.38%
Training completed. Best validation accuracy: 58.38%


0,1
batch_loss,███▇▆▇▆▇▆▆▆▅▆▅▅▆▅▅▅▄▃▃▃▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
learning_rate,▁▁▁▂▂▃▃▄▄▅▇▇███████▇▇▆▆▅▅▄▄▄▃▃▂▂▂▂▁▁▁▁▁▁
train_accuracy,▁▂▃▃▃▄▄▄▅▅▅▆▆▇▇▇█████████
train_loss,█▇▇▆▆▆▆▅▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▄▅▅▆▇▇▇▇▇▇▇▇▇▇████████
val_loss,█▆▅▃▄▃▂▁▁▁▂▂▃▄▅▅▆▆▆▇▇▆▆▆▆

0,1
batch_loss,0.66921
best_val_accuracy,58.37687
epoch,24.0
learning_rate,0.0
train_accuracy,99.46445
train_loss,0.65225
val_accuracy,58.37687
val_loss,1.67039


[34m[1mwandb[0m: Agent Starting Run: z6zvh83j with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.3419796032943357
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	label_smoothing: 0.1890148364320457
[34m[1mwandb[0m: 	learning_rate: 0.003587760475897247
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 1.277027029596828e-06


Epoch 1/25 [Train]: 100%|██████████| 359/359 [00:28<00:00, 12.81it/s, loss=1.628, acc=34.1%]


Epoch 1/25, Train Loss: 1.7643, Train Acc: 34.07%, Val Loss: 1.6922, Val Acc: 42.16%
New best model saved with validation accuracy: 42.16%


Epoch 2/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.04it/s, loss=1.633, acc=43.2%]


Epoch 2/25, Train Loss: 1.6473, Train Acc: 43.20%, Val Loss: 1.6270, Val Acc: 43.75%
New best model saved with validation accuracy: 43.75%


Epoch 3/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.03it/s, loss=1.606, acc=46.0%]


Epoch 3/25, Train Loss: 1.6081, Train Acc: 46.04%, Val Loss: 1.5887, Val Acc: 47.58%
New best model saved with validation accuracy: 47.58%


Epoch 4/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.04it/s, loss=1.698, acc=48.5%]


Epoch 4/25, Train Loss: 1.5767, Train Acc: 48.55%, Val Loss: 1.6137, Val Acc: 46.48%


Epoch 5/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.21it/s, loss=1.585, acc=50.7%]


Epoch 5/25, Train Loss: 1.5482, Train Acc: 50.66%, Val Loss: 1.5410, Val Acc: 51.15%
New best model saved with validation accuracy: 51.15%


Epoch 6/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.61it/s, loss=1.339, acc=54.6%]


Epoch 6/25, Train Loss: 1.4882, Train Acc: 54.58%, Val Loss: 1.4880, Val Acc: 54.09%
New best model saved with validation accuracy: 54.09%


Epoch 7/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.73it/s, loss=1.511, acc=57.9%]


Epoch 7/25, Train Loss: 1.4373, Train Acc: 57.87%, Val Loss: 1.4787, Val Acc: 54.93%
New best model saved with validation accuracy: 54.93%


Epoch 8/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.13it/s, loss=1.288, acc=61.5%]


Epoch 8/25, Train Loss: 1.3848, Train Acc: 61.51%, Val Loss: 1.4735, Val Acc: 54.84%


Epoch 9/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.93it/s, loss=1.157, acc=65.7%]


Epoch 9/25, Train Loss: 1.3208, Train Acc: 65.68%, Val Loss: 1.4747, Val Acc: 56.53%
New best model saved with validation accuracy: 56.53%


Epoch 10/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.84it/s, loss=1.252, acc=70.7%]


Epoch 10/25, Train Loss: 1.2483, Train Acc: 70.67%, Val Loss: 1.4872, Val Acc: 55.40%


Epoch 11/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.17it/s, loss=1.179, acc=75.5%]


Epoch 11/25, Train Loss: 1.1696, Train Acc: 75.52%, Val Loss: 1.4726, Val Acc: 57.66%
New best model saved with validation accuracy: 57.66%


Epoch 12/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.04it/s, loss=1.259, acc=80.8%]


Epoch 12/25, Train Loss: 1.0832, Train Acc: 80.75%, Val Loss: 1.5000, Val Acc: 58.29%
New best model saved with validation accuracy: 58.29%


Epoch 13/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.96it/s, loss=0.930, acc=85.8%]


Epoch 13/25, Train Loss: 1.0009, Train Acc: 85.79%, Val Loss: 1.5305, Val Acc: 57.33%


Epoch 14/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.17it/s, loss=0.953, acc=89.8%]


Epoch 14/25, Train Loss: 0.9327, Train Acc: 89.83%, Val Loss: 1.6034, Val Acc: 56.81%


Epoch 15/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.37it/s, loss=0.894, acc=92.4%]


Epoch 15/25, Train Loss: 0.8843, Train Acc: 92.43%, Val Loss: 1.6007, Val Acc: 58.03%


Epoch 16/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.00it/s, loss=0.933, acc=95.3%]


Epoch 16/25, Train Loss: 0.8371, Train Acc: 95.27%, Val Loss: 1.6560, Val Acc: 56.74%


Epoch 17/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.88it/s, loss=0.793, acc=96.4%]


Epoch 17/25, Train Loss: 0.8129, Train Acc: 96.36%, Val Loss: 1.6538, Val Acc: 57.51%


Epoch 18/25 [Train]: 100%|██████████| 359/359 [00:28<00:00, 12.45it/s, loss=0.750, acc=97.6%]


Epoch 18/25, Train Loss: 0.7908, Train Acc: 97.56%, Val Loss: 1.6647, Val Acc: 57.09%


Epoch 19/25 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.84it/s, loss=0.745, acc=98.4%]


Epoch 19/25, Train Loss: 0.7761, Train Acc: 98.41%, Val Loss: 1.6704, Val Acc: 57.89%


Epoch 20/25 [Train]: 100%|██████████| 359/359 [00:29<00:00, 12.15it/s, loss=0.744, acc=98.8%]


Epoch 20/25, Train Loss: 0.7662, Train Acc: 98.83%, Val Loss: 1.6799, Val Acc: 57.94%
Early stopping triggered after 20 epochs
Training completed. Best validation accuracy: 58.29%


0,1
batch_loss,█▇█▇▇▇▆▆▇▆▆▆▆▆▇▆▅▅▄▄▃▃▄▂▂▃▂▂▂▂▁▁▂▂▁▁▁▁▁▁
best_val_accuracy,▁
epoch,▁▁▁▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇███
learning_rate,▁▁▁▁▂▃▃▄▄▅███████▇▇▇▇▇▇▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂
train_accuracy,▁▂▂▃▃▃▄▄▄▅▅▆▇▇▇█████
train_loss,█▇▇▇▆▆▆▅▅▄▄▃▃▂▂▁▁▁▁▁
val_accuracy,▁▂▃▃▅▆▇▇▇▇███▇█▇█▇██
val_loss,█▆▅▅▃▁▁▁▁▁▁▂▃▅▅▇▇▇▇█

0,1
batch_loss,0.8006
best_val_accuracy,58.28979
epoch,19.0
learning_rate,0.00068
train_accuracy,98.83311
train_loss,0.76625
val_accuracy,57.94148
val_loss,1.67986


[34m[1mwandb[0m: Agent Starting Run: i1z98amg with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.5591138463444963
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	label_smoothing: 0.036151178354888416
[34m[1mwandb[0m: 	learning_rate: 0.0001410799966305842
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 0.0009820566525906669


Epoch 1/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.74it/s, loss=1.978, acc=21.5%]


Epoch 1/25, Train Loss: 1.8784, Train Acc: 21.51%, Val Loss: 1.7883, Val Acc: 26.05%
New best model saved with validation accuracy: 26.05%


Epoch 2/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.47it/s, loss=1.630, acc=27.5%]


Epoch 2/25, Train Loss: 1.7818, Train Acc: 27.51%, Val Loss: 1.6882, Val Acc: 33.77%
New best model saved with validation accuracy: 33.77%


Epoch 3/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.77it/s, loss=1.729, acc=34.9%]


Epoch 3/25, Train Loss: 1.6619, Train Acc: 34.85%, Val Loss: 1.5748, Val Acc: 40.00%
New best model saved with validation accuracy: 40.00%


Epoch 4/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.63it/s, loss=1.473, acc=41.1%]


Epoch 4/25, Train Loss: 1.5583, Train Acc: 41.09%, Val Loss: 1.5034, Val Acc: 43.85%
New best model saved with validation accuracy: 43.85%


Epoch 5/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.43it/s, loss=1.354, acc=45.7%]


Epoch 5/25, Train Loss: 1.4699, Train Acc: 45.71%, Val Loss: 1.4412, Val Acc: 46.92%
New best model saved with validation accuracy: 46.92%


Epoch 6/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.37it/s, loss=1.563, acc=49.2%]


Epoch 6/25, Train Loss: 1.3967, Train Acc: 49.16%, Val Loss: 1.3884, Val Acc: 49.90%
New best model saved with validation accuracy: 49.90%


Epoch 7/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.34it/s, loss=1.129, acc=53.1%]


Epoch 7/25, Train Loss: 1.3126, Train Acc: 53.15%, Val Loss: 1.3719, Val Acc: 50.91%
New best model saved with validation accuracy: 50.91%


Epoch 8/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.03it/s, loss=1.165, acc=56.6%]


Epoch 8/25, Train Loss: 1.2367, Train Acc: 56.59%, Val Loss: 1.3795, Val Acc: 51.27%
New best model saved with validation accuracy: 51.27%


Epoch 9/25 [Train]: 100%|██████████| 718/718 [00:39<00:00, 18.22it/s, loss=1.110, acc=60.9%]


Epoch 9/25, Train Loss: 1.1437, Train Acc: 60.85%, Val Loss: 1.3984, Val Acc: 50.35%


Epoch 10/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.50it/s, loss=1.026, acc=64.7%]


Epoch 10/25, Train Loss: 1.0490, Train Acc: 64.66%, Val Loss: 1.3763, Val Acc: 52.87%
New best model saved with validation accuracy: 52.87%


Epoch 11/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.67it/s, loss=0.961, acc=69.3%]


Epoch 11/25, Train Loss: 0.9539, Train Acc: 69.33%, Val Loss: 1.4211, Val Acc: 51.64%


Epoch 12/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.20it/s, loss=0.733, acc=73.8%]


Epoch 12/25, Train Loss: 0.8466, Train Acc: 73.77%, Val Loss: 1.4721, Val Acc: 53.03%
New best model saved with validation accuracy: 53.03%


Epoch 13/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.31it/s, loss=0.755, acc=78.2%]


Epoch 13/25, Train Loss: 0.7498, Train Acc: 78.23%, Val Loss: 1.5196, Val Acc: 53.01%


Epoch 14/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.51it/s, loss=0.965, acc=81.5%]


Epoch 14/25, Train Loss: 0.6684, Train Acc: 81.50%, Val Loss: 1.5651, Val Acc: 53.69%
New best model saved with validation accuracy: 53.69%


Epoch 15/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.66it/s, loss=0.671, acc=84.6%]


Epoch 15/25, Train Loss: 0.5921, Train Acc: 84.63%, Val Loss: 1.6297, Val Acc: 53.10%


Epoch 16/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.06it/s, loss=0.588, acc=87.5%]


Epoch 16/25, Train Loss: 0.5200, Train Acc: 87.55%, Val Loss: 1.7004, Val Acc: 52.68%


Epoch 17/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.56it/s, loss=0.360, acc=90.1%]


Epoch 17/25, Train Loss: 0.4612, Train Acc: 90.09%, Val Loss: 1.7300, Val Acc: 53.01%


Epoch 18/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.70it/s, loss=0.682, acc=91.9%]


Epoch 18/25, Train Loss: 0.4185, Train Acc: 91.93%, Val Loss: 1.7691, Val Acc: 54.20%
New best model saved with validation accuracy: 54.20%


Epoch 19/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.49it/s, loss=0.548, acc=93.2%]


Epoch 19/25, Train Loss: 0.3826, Train Acc: 93.19%, Val Loss: 1.7827, Val Acc: 54.34%
New best model saved with validation accuracy: 54.34%


Epoch 20/25 [Train]: 100%|██████████| 718/718 [00:38<00:00, 18.77it/s, loss=0.278, acc=94.6%]


Epoch 20/25, Train Loss: 0.3459, Train Acc: 94.65%, Val Loss: 1.8240, Val Acc: 54.09%


Epoch 21/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.21it/s, loss=0.232, acc=95.5%]


Epoch 21/25, Train Loss: 0.3210, Train Acc: 95.52%, Val Loss: 1.8517, Val Acc: 53.87%


Epoch 22/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.78it/s, loss=0.379, acc=96.2%]


Epoch 22/25, Train Loss: 0.3068, Train Acc: 96.16%, Val Loss: 1.8499, Val Acc: 54.28%


Epoch 23/25 [Train]: 100%|██████████| 718/718 [00:36<00:00, 19.82it/s, loss=0.379, acc=96.8%]


Epoch 23/25, Train Loss: 0.2913, Train Acc: 96.80%, Val Loss: 1.8487, Val Acc: 54.09%


Epoch 24/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.39it/s, loss=0.281, acc=97.1%]


Epoch 24/25, Train Loss: 0.2823, Train Acc: 97.11%, Val Loss: 1.8614, Val Acc: 54.02%


Epoch 25/25 [Train]: 100%|██████████| 718/718 [00:37<00:00, 19.26it/s, loss=0.236, acc=97.3%]


Epoch 25/25, Train Loss: 0.2803, Train Acc: 97.30%, Val Loss: 1.8540, Val Acc: 54.06%
Training completed. Best validation accuracy: 54.34%


0,1
batch_loss,██▇█▇▆▇▆▇▅▆▆▅▅▅▄▄▅▄▅▃▃▃▁▃▂▁▁▁▂▁▁▁▁▂▁▁▁▁▁
best_val_accuracy,▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇███
learning_rate,▁▁▁▂▂▃▄▅▅▅▆▇▇▇▇███▇▇▆▆▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁
train_accuracy,▁▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇▇████████
train_loss,██▇▇▆▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▇▇▇▇█▇██████████████
val_loss,▇▆▄▃▂▁▁▁▁▁▂▂▃▄▅▆▆▇▇▇█████

0,1
batch_loss,0.22563
best_val_accuracy,54.33647
epoch,24.0
learning_rate,0.0
train_accuracy,97.29612
train_loss,0.28031
val_accuracy,54.05782
val_loss,1.85397


In [44]:
api = wandb.Api()
runs = api.runs("konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_4")


In [45]:
best_run = None
best_val_acc = 0

In [46]:
for run in runs:
    if run.state == "finished" and "best_val_accuracy" in run.summary:
        val_acc = run.summary["best_val_accuracy"]
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_run = run

In [48]:
if best_run:
    print(f"\nBest run: {best_run.name}")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    print(f"Best hyperparameters: {best_run.config}")

    # Download the artifact
    artifacts = api.artifact(f"konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_4/best_model_{best_run.name}:latest")
    download_path = artifacts.download()

    # Check what was actually downloaded
    print(f"Downloaded to: {download_path}")

    # Find the correct path to the model file
    import os
    model_file_path = None

    # Check in the download directory
    if os.path.exists(os.path.join(download_path, "best_model.pth")):
        model_file_path = os.path.join(download_path, "best_model.pth")
    # Check in current directory
    elif os.path.exists("best_model.pth"):
        model_file_path = "best_model.pth"
    # Search recursively
    else:
        for root, dirs, files in os.walk("."):
            if "best_model.pth" in files:
                model_file_path = os.path.join(root, "best_model.pth")
                break

    if model_file_path and os.path.exists(model_file_path):
        print(f"Found model file at: {model_file_path}")

        # Create final artifact with correct path
        final_artifact = wandb.Artifact(
            name="final_best_resnet_model",
            type="model",
            description=f"Final best ResNet model with {best_val_acc:.2f}% validation accuracy"
        )
        final_artifact.add_file(model_file_path)

        # Upload the final artifact
        with wandb.init(project="Facial_Expression_Recognition_4", name="final_model_upload"):
            wandb.log_artifact(final_artifact)
            wandb.log({
                "final_best_val_accuracy": best_val_acc,
                "model_architecture": "ResNet",
                "ready_for_testing": True
            })

        print(f"\nFinal model uploaded to wandb as 'final_best_resnet_model'")
        print("You can now load this model in the future for testing on any dataset!")

        print("\nTo test on a different dataset in the future, use:")
        print("evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')")

    else:
        print("Error: Could not find the downloaded model file!")
        print("Files in current directory:")
        for item in os.listdir("."):
            print(f"  {item}")

else:
    print("No successful runs found!")


Best run: wise-sweep-1
Best validation accuracy: 58.38%
Best hyperparameters: {'epochs': 25, 'patience': 8, 'batch_size': 128, 'dropout_rate': 0.3362882088171564, 'weight_decay': 7.043022279825436e-05, 'learning_rate': 0.001110935709956254, 'label_smoothing': 0.1560930322152969}


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Downloaded to: /content/artifacts/best_model_wise-sweep-1:v19
Found model file at: /content/artifacts/best_model_wise-sweep-1:v19/best_model.pth


0,1
final_best_val_accuracy,▁

0,1
final_best_val_accuracy,58.37687
model_architecture,ResNet
ready_for_testing,True



Final model uploaded to wandb as 'final_best_resnet_model'
You can now load this model in the future for testing on any dataset!

To test on a different dataset in the future, use:
evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')
