# experiment 5

In [1]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [3]:
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json

In [4]:
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

challenges-in-representation-learning-facial-expression-recognition-challenge.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge.zip

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
replace example_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: example_submission.csv  
replace fer2013.tar.gz? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: fer2013.tar.gz          
replace icml_face_data.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: icml_face_data.csv      y
y

replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename:   inflating: test.csv                
replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename:   inflating: train.csv               y



In [7]:
!pip install wandb onnx -Uq

# data staff

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [11]:
wandb.login()

True

In [12]:
class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx]['pixels']
        emotion = self.data.iloc[idx]['emotion']

        image = np.array([int(pixel) for pixel in pixels.split()]).reshape(48, 48)
        image = image.astype(np.float32) / 255.0

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.FloatTensor(image).unsqueeze(0)

        return image, emotion

In [13]:
def get_transforms():
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.3, hue=0.15),
        transforms.RandomAffine(degrees=0, scale=(0.6, 1.4)),
        transforms.RandomErasing(p=0.4, scale=(0.02, 0.2)),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    val_test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    return train_transform, val_test_transform

In [14]:
def create_train_val_loaders(csv_file, batch_size=64, train_split=0.8):
    train_transform, val_transform = get_transforms()

    full_dataset = FER2013Dataset(csv_file, transform=None)

    train_size = int(train_split * len(full_dataset))
    val_size = len(full_dataset) - train_size

    train_dataset, val_dataset = random_split(
        full_dataset, [train_size, val_size],
        generator=torch.Generator().manual_seed(42)
    )

    train_dataset.dataset.transform = train_transform
    val_dataset.dataset.transform = val_transform

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader

In [15]:
def create_test_loader(csv_file, batch_size=64):
    _, test_transform = get_transforms()

    test_dataset = FER2013Dataset(csv_file, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return test_loader

In [16]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.dropout = nn.Dropout2d(0.15)

    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = self.dropout(out)
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu(out)

        return out

In [17]:
class ImprovedFacialExpressionResNet(nn.Module):
    def __init__(self, num_classes=7, dropout_rate=0.6):
        super(ImprovedFacialExpressionResNet, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(32, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(dropout_rate * 0.5)
        self.fc2 = nn.Linear(128, num_classes)

        self._initialize_weights()

    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride, downsample))

        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)

        return x


In [29]:
def test_overfitting():
    print("Testing improved model architecture with overfitting on small dataset...")

    model = ImprovedFacialExpressionResNet(dropout_rate=0.3).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_loader, _ = create_train_val_loaders('train.csv', batch_size=32)

    small_batch = []
    for i, (data, target) in enumerate(train_loader):
        small_batch.extend(list(zip(data, target)))
        if len(small_batch) >= 20:
            break

    small_batch = small_batch[:20]

    for epoch in range(50):
        model.train()
        total_loss = 0
        correct = 0

        for data, target in small_batch:
            data, target = data.unsqueeze(0).to(device), torch.tensor([target]).to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

        accuracy = 100. * correct / len(small_batch)
        avg_loss = total_loss / len(small_batch)

        print(f"Overfit Epoch {epoch+1}/30, Loss: {avg_loss:.4f}, Acc: {accuracy:.2f}%")

        if accuracy >= 95.0:
            print("Improved model can overfit successfully!")
            break

    print("Overfitting test completed.\n")


In [19]:
def compute_loss(loader, model, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

    return total_loss / len(loader), 100. * correct / total


In [20]:
def train_model(config=None):
    with wandb.init(project="Facial_Expression_Recognition_5", config=config):
        config = wandb.config

        model = ImprovedFacialExpressionResNet(dropout_rate=config.dropout_rate).to(device)
        train_loader, val_loader = create_train_val_loaders('train.csv', config.batch_size)

        criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing)
        optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer,
            T_0=len(train_loader) * 5,
            T_mult=1,
            eta_min=config.learning_rate * 0.01
        )

        best_val_acc = 0
        patience_counter = 0

        wandb.watch(model, log="gradients", log_freq=100)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')

            for batch_idx, (data, target) in enumerate(progress_bar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)

                optimizer.step()
                scheduler.step()

                train_loss += loss.item()
                pred = output.argmax(dim=1)
                train_correct += pred.eq(target).sum().item()
                train_total += target.size(0)

                if batch_idx % 50 == 0:
                    current_lr = scheduler.get_last_lr()[0]
                    wandb.log({
                        "batch_loss": loss.item(),
                        "learning_rate": current_lr,
                        "epoch": epoch
                    })

                progress_bar.set_postfix({
                    'loss': f'{loss.item():.3f}',
                    'acc': f'{100.*train_correct/train_total:.1f}%'
                })

            train_acc = 100. * train_correct / train_total
            train_loss = train_loss / len(train_loader)

            val_loss, val_acc = compute_loss(val_loader, model, criterion, device)

            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_loss": val_loss,
                "val_accuracy": val_acc,
                "train_val_gap": train_acc - val_acc
            })

            print(f"Epoch {epoch+1}/{config.epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%, Gap: {train_acc-val_acc:.2f}%")

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0

                model_artifact = wandb.Artifact(
                    name=f"best_model_{wandb.run.name}",
                    type="model",
                    description=f"Best improved ResNet model with validation accuracy: {val_acc:.2f}%"
                )

                model_save_dict = {
                    'model_state_dict': model.state_dict(),
                    'model_config': {
                        'num_classes': 7,
                        'dropout_rate': config.dropout_rate
                    },
                    'training_config': dict(config),
                    'val_accuracy': val_acc,
                    'epoch': epoch,
                    'model_architecture': 'ImprovedFacialExpressionResNet'
                }

                torch.save(model_save_dict, "best_model.pth")

                model_artifact.add_file("best_model.pth")
                wandb.log_artifact(model_artifact)

                print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
            else:
                patience_counter += 1

            if patience_counter >= config.patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break

        wandb.log({
            "best_val_accuracy": best_val_acc
        })

        print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

        return best_val_acc


In [21]:
def evaluate_model_on_testset(model_artifact_path, test_csv_file, batch_size=64):
    checkpoint = torch.load(model_artifact_path, map_location=device)

    model_config = checkpoint['model_config']
    model = ImprovedFacialExpressionResNet(
        num_classes=model_config['num_classes'],
        dropout_rate=model_config['dropout_rate']
    ).to(device)

    model.load_state_dict(checkpoint['model_state_dict'])

    test_loader = create_test_loader(test_csv_file, batch_size)

    criterion = nn.CrossEntropyLoss()
    test_loss, test_acc = compute_loss(test_loader, model, criterion, device)

    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds, target_names=emotion_labels))

    cm = confusion_matrix(all_targets, all_preds)

    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title('Confusion Matrix - Facial Expression Recognition (Improved ResNet)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    return test_acc, all_preds, all_targets


In [31]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'best_val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.00005,
            'max': 0.005
        },
        'batch_size': {
            'values': [32, 64]
        },
        'dropout_rate': {
            'distribution': 'uniform',
            'min': 0.4,
            'max': 0.7
        },
        'weight_decay': {
            'distribution': 'log_uniform_values',
            'min': 1e-4,
            'max': 1e-2
        },
        'label_smoothing': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 0.3
        },
        'epochs': {
            'value': 30
        },
        'patience': {
            'value': 8
        }
    }
}

In [32]:
test_overfitting()

Testing improved model architecture with overfitting on small dataset...
Overfit Epoch 1/30, Loss: 1.9576, Acc: 25.00%
Overfit Epoch 2/30, Loss: 1.8440, Acc: 30.00%
Overfit Epoch 3/30, Loss: 1.7916, Acc: 30.00%
Overfit Epoch 4/30, Loss: 1.7272, Acc: 30.00%
Overfit Epoch 5/30, Loss: 1.6530, Acc: 30.00%
Overfit Epoch 6/30, Loss: 1.5868, Acc: 30.00%
Overfit Epoch 7/30, Loss: 1.5151, Acc: 30.00%
Overfit Epoch 8/30, Loss: 1.4373, Acc: 30.00%
Overfit Epoch 9/30, Loss: 1.3532, Acc: 35.00%
Overfit Epoch 10/30, Loss: 1.2424, Acc: 35.00%
Overfit Epoch 11/30, Loss: 1.1876, Acc: 35.00%
Overfit Epoch 12/30, Loss: 1.1600, Acc: 50.00%
Overfit Epoch 13/30, Loss: 1.0732, Acc: 50.00%
Overfit Epoch 14/30, Loss: 1.0140, Acc: 50.00%
Overfit Epoch 15/30, Loss: 0.9429, Acc: 55.00%
Overfit Epoch 16/30, Loss: 0.9485, Acc: 55.00%
Overfit Epoch 17/30, Loss: 0.8998, Acc: 45.00%
Overfit Epoch 18/30, Loss: 0.8409, Acc: 65.00%
Overfit Epoch 19/30, Loss: 0.8472, Acc: 60.00%
Overfit Epoch 20/30, Loss: 0.8300, Acc: 50.

In [33]:
sweep_id = wandb.sweep(sweep_config, project="Facial_Expression_Recognition_5")

Create sweep with ID: als3wxaz
Sweep URL: https://wandb.ai/konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_5/sweeps/als3wxaz


In [34]:
print("Running improved hyperparameter sweep with anti-overfitting ResNet...")
wandb.agent(sweep_id, train_model, count=3)

Running improved hyperparameter sweep with anti-overfitting ResNet...


[34m[1mwandb[0m: Agent Starting Run: 1bdep6nf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.5479010231390392
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	label_smoothing: 0.22223961117304525
[34m[1mwandb[0m: 	learning_rate: 0.0003507942900207464
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 0.0007072641496874819


Epoch 1/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.03it/s, loss=1.654, acc=34.6%]


Epoch 1/30, Train Loss: 1.7682, Train Acc: 34.64%, Val Loss: 1.6647, Val Acc: 42.51%, Gap: -7.87%
New best model saved with validation accuracy: 42.51%


Epoch 2/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.46it/s, loss=1.463, acc=44.6%]


Epoch 2/30, Train Loss: 1.6527, Train Acc: 44.56%, Val Loss: 1.5941, Val Acc: 47.58%, Gap: -3.02%
New best model saved with validation accuracy: 47.58%


Epoch 3/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.69it/s, loss=1.850, acc=49.4%]


Epoch 3/30, Train Loss: 1.5880, Train Acc: 49.44%, Val Loss: 1.5622, Val Acc: 50.19%, Gap: -0.76%
New best model saved with validation accuracy: 50.19%


Epoch 4/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.80it/s, loss=1.436, acc=53.3%]


Epoch 4/30, Train Loss: 1.5373, Train Acc: 53.34%, Val Loss: 1.5408, Val Acc: 51.67%, Gap: 1.67%
New best model saved with validation accuracy: 51.67%


Epoch 5/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.81it/s, loss=1.411, acc=55.3%]


Epoch 5/30, Train Loss: 1.5061, Train Acc: 55.29%, Val Loss: 1.5386, Val Acc: 52.14%, Gap: 3.15%
New best model saved with validation accuracy: 52.14%


Epoch 6/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.55it/s, loss=1.556, acc=51.8%]


Epoch 6/30, Train Loss: 1.5590, Train Acc: 51.79%, Val Loss: 1.5546, Val Acc: 51.04%, Gap: 0.75%


Epoch 7/30 [Train]: 100%|██████████| 718/718 [00:29<00:00, 24.33it/s, loss=1.786, acc=55.0%]


Epoch 7/30, Train Loss: 1.5182, Train Acc: 55.02%, Val Loss: 1.5300, Val Acc: 53.05%, Gap: 1.97%
New best model saved with validation accuracy: 53.05%


Epoch 8/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.62it/s, loss=1.446, acc=60.3%]


Epoch 8/30, Train Loss: 1.4528, Train Acc: 60.34%, Val Loss: 1.5204, Val Acc: 55.42%, Gap: 4.92%
New best model saved with validation accuracy: 55.42%


Epoch 9/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.64it/s, loss=1.346, acc=65.5%]


Epoch 9/30, Train Loss: 1.3823, Train Acc: 65.47%, Val Loss: 1.5058, Val Acc: 56.23%, Gap: 9.24%
New best model saved with validation accuracy: 56.23%


Epoch 10/30 [Train]: 100%|██████████| 718/718 [00:29<00:00, 24.39it/s, loss=1.304, acc=68.5%]


Epoch 10/30, Train Loss: 1.3438, Train Acc: 68.46%, Val Loss: 1.5138, Val Acc: 56.46%, Gap: 12.00%
New best model saved with validation accuracy: 56.46%


Epoch 11/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.83it/s, loss=1.301, acc=61.8%]


Epoch 11/30, Train Loss: 1.4336, Train Acc: 61.77%, Val Loss: 1.5179, Val Acc: 54.34%, Gap: 7.43%


Epoch 12/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.86it/s, loss=1.290, acc=64.9%]


Epoch 12/30, Train Loss: 1.3935, Train Acc: 64.87%, Val Loss: 1.5170, Val Acc: 55.90%, Gap: 8.96%


Epoch 13/30 [Train]: 100%|██████████| 718/718 [00:29<00:00, 24.17it/s, loss=1.481, acc=70.5%]


Epoch 13/30, Train Loss: 1.3152, Train Acc: 70.49%, Val Loss: 1.5196, Val Acc: 56.81%, Gap: 13.68%
New best model saved with validation accuracy: 56.81%


Epoch 14/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.43it/s, loss=1.485, acc=76.2%]


Epoch 14/30, Train Loss: 1.2383, Train Acc: 76.22%, Val Loss: 1.5276, Val Acc: 57.65%, Gap: 18.58%
New best model saved with validation accuracy: 57.65%


Epoch 15/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.49it/s, loss=1.098, acc=79.4%]


Epoch 15/30, Train Loss: 1.1932, Train Acc: 79.38%, Val Loss: 1.5374, Val Acc: 57.56%, Gap: 21.82%


Epoch 16/30 [Train]: 100%|██████████| 718/718 [00:29<00:00, 24.28it/s, loss=1.289, acc=70.8%]


Epoch 16/30, Train Loss: 1.3130, Train Acc: 70.76%, Val Loss: 1.5457, Val Acc: 54.79%, Gap: 15.97%


Epoch 17/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.72it/s, loss=1.205, acc=74.0%]


Epoch 17/30, Train Loss: 1.2710, Train Acc: 74.03%, Val Loss: 1.5503, Val Acc: 56.15%, Gap: 17.88%


Epoch 18/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.77it/s, loss=1.181, acc=79.4%]


Epoch 18/30, Train Loss: 1.1926, Train Acc: 79.35%, Val Loss: 1.5552, Val Acc: 56.90%, Gap: 22.46%


Epoch 19/30 [Train]: 100%|██████████| 718/718 [00:29<00:00, 24.07it/s, loss=1.212, acc=84.0%]


Epoch 19/30, Train Loss: 1.1274, Train Acc: 83.95%, Val Loss: 1.5735, Val Acc: 57.12%, Gap: 26.83%


Epoch 20/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.55it/s, loss=0.984, acc=87.0%]


Epoch 20/30, Train Loss: 1.0824, Train Acc: 86.96%, Val Loss: 1.5798, Val Acc: 57.04%, Gap: 29.93%


Epoch 21/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.52it/s, loss=1.116, acc=78.2%]


Epoch 21/30, Train Loss: 1.2070, Train Acc: 78.19%, Val Loss: 1.5594, Val Acc: 56.46%, Gap: 21.72%


Epoch 22/30 [Train]: 100%|██████████| 718/718 [00:30<00:00, 23.41it/s, loss=1.148, acc=80.3%]


Epoch 22/30, Train Loss: 1.1762, Train Acc: 80.32%, Val Loss: 1.5744, Val Acc: 56.30%, Gap: 24.02%
Early stopping triggered after 22 epochs
Training completed. Best validation accuracy: 57.65%


0,1
batch_loss,██▆▆▆▄▆▄▅▄▄▃▅▅▃▄▄▄▆▅▃▃▂▃▄▄▃▃▁▃▃▃▃▂▁▃▁▃▃▂
best_val_accuracy,▁
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█████
learning_rate,█▇▆▅▃▂▂▁▁▁▇▇▇▇▆▂▂▁▁▁█▇▆▄▃▂▂▂▁▁▁██▇▇▁███▇
train_accuracy,▁▂▃▄▄▃▄▄▅▆▅▅▆▇▇▆▆▇██▇▇
train_loss,█▇▆▆▅▆▅▅▄▄▅▄▃▃▂▃▃▂▁▁▂▂
train_val_gap,▁▂▂▃▃▃▃▃▄▅▄▄▅▆▆▅▆▇▇█▆▇
val_accuracy,▁▃▅▅▅▅▆▇▇▇▆▇███▇▇███▇▇
val_loss,█▅▃▃▂▃▂▂▁▁▂▁▂▂▂▃▃▃▄▄▃▄

0,1
batch_loss,1.38073
best_val_accuracy,57.64542
epoch,21.0
learning_rate,0.00023
train_accuracy,80.31959
train_loss,1.17624
train_val_gap,24.01517
val_accuracy,56.30442
val_loss,1.57444


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c8pc87t1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.4909229676896912
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	label_smoothing: 0.2043106694153542
[34m[1mwandb[0m: 	learning_rate: 0.00034487176440310024
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 0.00010395869729956298


Epoch 1/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.55it/s, loss=1.694, acc=32.5%]


Epoch 1/30, Train Loss: 1.7809, Train Acc: 32.49%, Val Loss: 1.6681, Val Acc: 41.83%, Gap: -9.34%
New best model saved with validation accuracy: 41.83%


Epoch 2/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.55it/s, loss=1.599, acc=43.5%]


Epoch 2/30, Train Loss: 1.6452, Train Acc: 43.55%, Val Loss: 1.6023, Val Acc: 45.63%, Gap: -2.08%
New best model saved with validation accuracy: 45.63%


Epoch 3/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.42it/s, loss=1.524, acc=48.4%]


Epoch 3/30, Train Loss: 1.5805, Train Acc: 48.38%, Val Loss: 1.5589, Val Acc: 48.82%, Gap: -0.43%
New best model saved with validation accuracy: 48.82%


Epoch 4/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.28it/s, loss=1.675, acc=52.5%]


Epoch 4/30, Train Loss: 1.5288, Train Acc: 52.54%, Val Loss: 1.5434, Val Acc: 50.31%, Gap: 2.22%
New best model saved with validation accuracy: 50.31%


Epoch 5/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.33it/s, loss=1.552, acc=54.3%]


Epoch 5/30, Train Loss: 1.4993, Train Acc: 54.29%, Val Loss: 1.5399, Val Acc: 50.73%, Gap: 3.56%
New best model saved with validation accuracy: 50.73%


Epoch 6/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.51it/s, loss=1.525, acc=51.3%]


Epoch 6/30, Train Loss: 1.5418, Train Acc: 51.30%, Val Loss: 1.5429, Val Acc: 51.01%, Gap: 0.29%
New best model saved with validation accuracy: 51.01%


Epoch 7/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 12.99it/s, loss=1.594, acc=55.8%]


Epoch 7/30, Train Loss: 1.4855, Train Acc: 55.79%, Val Loss: 1.5158, Val Acc: 52.98%, Gap: 2.82%
New best model saved with validation accuracy: 52.98%


Epoch 8/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.77it/s, loss=1.429, acc=60.2%]


Epoch 8/30, Train Loss: 1.4246, Train Acc: 60.24%, Val Loss: 1.5024, Val Acc: 54.95%, Gap: 5.29%
New best model saved with validation accuracy: 54.95%


Epoch 9/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.23it/s, loss=1.363, acc=65.1%]


Epoch 9/30, Train Loss: 1.3604, Train Acc: 65.09%, Val Loss: 1.4973, Val Acc: 55.31%, Gap: 9.78%
New best model saved with validation accuracy: 55.31%


Epoch 10/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.05it/s, loss=1.435, acc=68.0%]


Epoch 10/30, Train Loss: 1.3209, Train Acc: 68.01%, Val Loss: 1.5024, Val Acc: 55.38%, Gap: 12.63%
New best model saved with validation accuracy: 55.38%


Epoch 11/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.16it/s, loss=1.346, acc=62.2%]


Epoch 11/30, Train Loss: 1.3986, Train Acc: 62.17%, Val Loss: 1.5056, Val Acc: 54.62%, Gap: 7.55%


Epoch 12/30 [Train]: 100%|██████████| 359/359 [00:28<00:00, 12.58it/s, loss=1.399, acc=66.1%]


Epoch 12/30, Train Loss: 1.3483, Train Acc: 66.12%, Val Loss: 1.5253, Val Acc: 54.89%, Gap: 11.22%


Epoch 13/30 [Train]: 100%|██████████| 359/359 [00:28<00:00, 12.78it/s, loss=1.204, acc=71.0%]


Epoch 13/30, Train Loss: 1.2795, Train Acc: 70.97%, Val Loss: 1.5216, Val Acc: 55.99%, Gap: 14.98%
New best model saved with validation accuracy: 55.99%


Epoch 14/30 [Train]: 100%|██████████| 359/359 [00:28<00:00, 12.73it/s, loss=1.212, acc=76.1%]


Epoch 14/30, Train Loss: 1.2035, Train Acc: 76.13%, Val Loss: 1.5343, Val Acc: 56.32%, Gap: 19.80%
New best model saved with validation accuracy: 56.32%


Epoch 15/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 12.96it/s, loss=1.242, acc=78.9%]


Epoch 15/30, Train Loss: 1.1634, Train Acc: 78.87%, Val Loss: 1.5374, Val Acc: 56.13%, Gap: 22.74%


Epoch 16/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.04it/s, loss=1.332, acc=71.8%]


Epoch 16/30, Train Loss: 1.2639, Train Acc: 71.76%, Val Loss: 1.5560, Val Acc: 54.01%, Gap: 17.75%


Epoch 17/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.31it/s, loss=1.244, acc=75.0%]


Epoch 17/30, Train Loss: 1.2210, Train Acc: 75.01%, Val Loss: 1.5612, Val Acc: 54.23%, Gap: 20.78%


Epoch 18/30 [Train]: 100%|██████████| 359/359 [00:26<00:00, 13.56it/s, loss=1.166, acc=79.3%]


Epoch 18/30, Train Loss: 1.1563, Train Acc: 79.27%, Val Loss: 1.5608, Val Acc: 55.61%, Gap: 23.67%


Epoch 19/30 [Train]: 100%|██████████| 359/359 [00:25<00:00, 14.09it/s, loss=1.097, acc=84.0%]


Epoch 19/30, Train Loss: 1.0847, Train Acc: 84.04%, Val Loss: 1.5775, Val Acc: 55.69%, Gap: 28.34%


Epoch 20/30 [Train]: 100%|██████████| 359/359 [00:25<00:00, 13.84it/s, loss=1.044, acc=86.0%]


Epoch 20/30, Train Loss: 1.0499, Train Acc: 86.03%, Val Loss: 1.5865, Val Acc: 55.69%, Gap: 30.33%


Epoch 21/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.21it/s, loss=1.324, acc=79.2%]


Epoch 21/30, Train Loss: 1.1562, Train Acc: 79.17%, Val Loss: 1.6043, Val Acc: 53.74%, Gap: 25.43%


Epoch 22/30 [Train]: 100%|██████████| 359/359 [00:27<00:00, 13.25it/s, loss=1.110, acc=81.0%]


Epoch 22/30, Train Loss: 1.1277, Train Acc: 81.01%, Val Loss: 1.5842, Val Acc: 55.21%, Gap: 25.80%
Early stopping triggered after 22 epochs
Training completed. Best validation accuracy: 56.32%


0,1
batch_loss,██▆▇▆▆▆▆▇▅▅▅▆▄▅▄▅▆▅▆▄▄▂▄▃▃▂▂▄▃▂▃▄▂▂▁▁▁▂▁
best_val_accuracy,▁
epoch,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
learning_rate,█▇▇▇▇▅▃▃▃▂▁██▆▃▃▂▂▂▁██▆▅▃▁▁██▇▆▅▅▃▂▁██▇▆
train_accuracy,▁▂▃▄▄▃▄▅▅▆▅▅▆▇▇▆▇▇██▇▇
train_loss,█▇▆▆▅▆▅▅▄▄▄▄▃▂▂▃▃▂▁▁▂▂
train_val_gap,▁▂▃▃▃▃▃▄▄▅▄▅▅▆▇▆▆▇██▇▇
val_accuracy,▁▃▄▅▅▅▆▇██▇▇███▇▇███▇▇
val_loss,█▅▄▃▃▃▂▁▁▁▁▂▂▃▃▃▄▄▄▅▅▅

0,1
batch_loss,1.04638
best_val_accuracy,56.32184
epoch,21.0
learning_rate,0.00023
train_accuracy,81.00753
train_loss,1.12775
train_val_gap,25.80029
val_accuracy,55.20724
val_loss,1.58417


[34m[1mwandb[0m: Agent Starting Run: a0bqej27 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.6672363047253114
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	label_smoothing: 0.2246903076650116
[34m[1mwandb[0m: 	learning_rate: 0.00019855275671255853
[34m[1mwandb[0m: 	patience: 8
[34m[1mwandb[0m: 	weight_decay: 0.0016493858200521444


Epoch 1/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.10it/s, loss=1.789, acc=30.9%]


Epoch 1/30, Train Loss: 1.8078, Train Acc: 30.94%, Val Loss: 1.6974, Val Acc: 40.63%, Gap: -9.69%
New best model saved with validation accuracy: 40.63%


Epoch 2/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.24it/s, loss=1.745, acc=41.2%]


Epoch 2/30, Train Loss: 1.6995, Train Acc: 41.21%, Val Loss: 1.6383, Val Acc: 43.38%, Gap: -2.17%
New best model saved with validation accuracy: 43.38%


Epoch 3/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.32it/s, loss=1.836, acc=45.2%]


Epoch 3/30, Train Loss: 1.6459, Train Acc: 45.21%, Val Loss: 1.6149, Val Acc: 45.44%, Gap: -0.22%
New best model saved with validation accuracy: 45.44%


Epoch 4/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.45it/s, loss=1.777, acc=48.1%]


Epoch 4/30, Train Loss: 1.6080, Train Acc: 48.10%, Val Loss: 1.6003, Val Acc: 47.07%, Gap: 1.02%
New best model saved with validation accuracy: 47.07%


Epoch 5/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 21.97it/s, loss=1.704, acc=49.5%]


Epoch 5/30, Train Loss: 1.5842, Train Acc: 49.49%, Val Loss: 1.5951, Val Acc: 47.42%, Gap: 2.07%
New best model saved with validation accuracy: 47.42%


Epoch 6/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 21.90it/s, loss=1.748, acc=47.8%]


Epoch 6/30, Train Loss: 1.6126, Train Acc: 47.75%, Val Loss: 1.6057, Val Acc: 46.69%, Gap: 1.06%


Epoch 7/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.15it/s, loss=1.586, acc=50.7%]


Epoch 7/30, Train Loss: 1.5721, Train Acc: 50.69%, Val Loss: 1.5762, Val Acc: 49.53%, Gap: 1.16%
New best model saved with validation accuracy: 49.53%


Epoch 8/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 21.95it/s, loss=1.598, acc=53.7%]


Epoch 8/30, Train Loss: 1.5300, Train Acc: 53.67%, Val Loss: 1.5653, Val Acc: 50.84%, Gap: 2.84%
New best model saved with validation accuracy: 50.84%


Epoch 9/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.39it/s, loss=1.821, acc=57.2%]


Epoch 9/30, Train Loss: 1.4870, Train Acc: 57.21%, Val Loss: 1.5641, Val Acc: 51.46%, Gap: 5.75%
New best model saved with validation accuracy: 51.46%


Epoch 10/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.78it/s, loss=1.521, acc=59.2%]


Epoch 10/30, Train Loss: 1.4598, Train Acc: 59.15%, Val Loss: 1.5616, Val Acc: 51.41%, Gap: 7.74%


Epoch 11/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.94it/s, loss=1.522, acc=55.5%]


Epoch 11/30, Train Loss: 1.5101, Train Acc: 55.47%, Val Loss: 1.5640, Val Acc: 51.50%, Gap: 3.97%
New best model saved with validation accuracy: 51.50%


Epoch 12/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.71it/s, loss=1.264, acc=57.8%]


Epoch 12/30, Train Loss: 1.4803, Train Acc: 57.83%, Val Loss: 1.5513, Val Acc: 51.76%, Gap: 6.07%
New best model saved with validation accuracy: 51.76%


Epoch 13/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.58it/s, loss=1.476, acc=62.4%]


Epoch 13/30, Train Loss: 1.4259, Train Acc: 62.41%, Val Loss: 1.5541, Val Acc: 53.17%, Gap: 9.24%
New best model saved with validation accuracy: 53.17%


Epoch 14/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.92it/s, loss=1.429, acc=66.5%]


Epoch 14/30, Train Loss: 1.3752, Train Acc: 66.49%, Val Loss: 1.5529, Val Acc: 53.69%, Gap: 12.79%
New best model saved with validation accuracy: 53.69%


Epoch 15/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.80it/s, loss=1.323, acc=68.2%]


Epoch 15/30, Train Loss: 1.3457, Train Acc: 68.20%, Val Loss: 1.5538, Val Acc: 53.69%, Gap: 14.51%


Epoch 16/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.59it/s, loss=1.564, acc=62.6%]


Epoch 16/30, Train Loss: 1.4223, Train Acc: 62.62%, Val Loss: 1.5535, Val Acc: 53.20%, Gap: 9.42%


Epoch 17/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.55it/s, loss=1.347, acc=65.8%]


Epoch 17/30, Train Loss: 1.3802, Train Acc: 65.82%, Val Loss: 1.5525, Val Acc: 53.92%, Gap: 11.90%
New best model saved with validation accuracy: 53.92%


Epoch 18/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.43it/s, loss=1.399, acc=69.7%]


Epoch 18/30, Train Loss: 1.3262, Train Acc: 69.70%, Val Loss: 1.5759, Val Acc: 54.04%, Gap: 15.66%
New best model saved with validation accuracy: 54.04%


Epoch 19/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.67it/s, loss=1.321, acc=74.3%]


Epoch 19/30, Train Loss: 1.2683, Train Acc: 74.33%, Val Loss: 1.5643, Val Acc: 55.52%, Gap: 18.81%
New best model saved with validation accuracy: 55.52%


Epoch 20/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.55it/s, loss=1.273, acc=76.3%]


Epoch 20/30, Train Loss: 1.2379, Train Acc: 76.34%, Val Loss: 1.5633, Val Acc: 55.35%, Gap: 20.99%


Epoch 21/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.41it/s, loss=1.307, acc=69.8%]


Epoch 21/30, Train Loss: 1.3269, Train Acc: 69.85%, Val Loss: 1.5881, Val Acc: 52.77%, Gap: 17.08%


Epoch 22/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.46it/s, loss=1.353, acc=72.9%]


Epoch 22/30, Train Loss: 1.2879, Train Acc: 72.86%, Val Loss: 1.5715, Val Acc: 54.65%, Gap: 18.21%


Epoch 23/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.91it/s, loss=1.117, acc=76.5%]


Epoch 23/30, Train Loss: 1.2333, Train Acc: 76.46%, Val Loss: 1.5729, Val Acc: 55.59%, Gap: 20.87%
New best model saved with validation accuracy: 55.59%


Epoch 24/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.66it/s, loss=1.131, acc=80.6%]


Epoch 24/30, Train Loss: 1.1789, Train Acc: 80.64%, Val Loss: 1.5910, Val Acc: 54.96%, Gap: 25.67%


Epoch 25/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.58it/s, loss=1.110, acc=82.7%]


Epoch 25/30, Train Loss: 1.1509, Train Acc: 82.67%, Val Loss: 1.5869, Val Acc: 55.52%, Gap: 27.15%


Epoch 26/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.69it/s, loss=1.276, acc=76.5%]


Epoch 26/30, Train Loss: 1.2385, Train Acc: 76.51%, Val Loss: 1.5995, Val Acc: 54.63%, Gap: 21.87%


Epoch 27/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.35it/s, loss=1.271, acc=78.3%]


Epoch 27/30, Train Loss: 1.2108, Train Acc: 78.30%, Val Loss: 1.6012, Val Acc: 55.02%, Gap: 23.28%


Epoch 28/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.35it/s, loss=1.204, acc=82.3%]


Epoch 28/30, Train Loss: 1.1587, Train Acc: 82.28%, Val Loss: 1.6065, Val Acc: 54.79%, Gap: 27.49%


Epoch 29/30 [Train]: 100%|██████████| 718/718 [00:32<00:00, 22.43it/s, loss=1.099, acc=85.2%]


Epoch 29/30, Train Loss: 1.1119, Train Acc: 85.25%, Val Loss: 1.6087, Val Acc: 55.96%, Gap: 29.29%
New best model saved with validation accuracy: 55.96%


Epoch 30/30 [Train]: 100%|██████████| 718/718 [00:31<00:00, 22.46it/s, loss=1.053, acc=87.5%]


Epoch 30/30, Train Loss: 1.0813, Train Acc: 87.51%, Val Loss: 1.6137, Val Acc: 55.59%, Gap: 31.92%
Training completed. Best validation accuracy: 55.96%


0,1
batch_loss,▇▆▆▇▆▇▆█▆▅▅▆▅▅▅▅▆▆▆▄▅▄▄▄▅▃▄▁▃▄▃▃▄▄▃▃▂▃▁▁
best_val_accuracy,▁
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████
learning_rate,███▇▄▁█▇▇▆▅▄▄▂▂▁▁▇▇▆▃▂███▅▄▄▁▁▆▅▄▂█▆▄▃▂▁
train_accuracy,▁▂▃▃▃▃▃▄▄▄▄▄▅▅▆▅▅▆▆▇▆▆▇▇▇▇▇▇██
train_loss,█▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▃▂▂▁▁
train_val_gap,▁▂▃▃▃▃▃▃▄▄▃▄▄▅▅▄▅▅▆▆▆▆▆▇▇▆▇▇██
val_accuracy,▁▂▃▄▄▄▅▆▆▆▆▆▇▇▇▇▇▇██▇▇███▇█▇██
val_loss,█▅▄▃▃▄▂▂▂▁▂▁▁▁▁▁▁▂▂▂▃▂▂▃▃▃▃▄▄▄

0,1
batch_loss,1.01189
best_val_accuracy,55.95611
epoch,29.0
learning_rate,0.0
train_accuracy,87.50816
train_loss,1.08125
train_val_gap,31.91778
val_accuracy,55.59039
val_loss,1.61374


In [35]:
api = wandb.Api()
runs = api.runs("konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_5")

best_run = None
best_val_acc = 0

In [36]:
for run in runs:
    if run.state == "finished" and "best_val_accuracy" in run.summary:
        val_acc = run.summary["best_val_accuracy"]
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_run = run

In [38]:
if best_run:
    print(f"\nBest run: {best_run.name}")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    print(f"Best hyperparameters: {best_run.config}")

    # Download the artifact
    artifacts = api.artifact(f"konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_5/best_model_{best_run.name}:latest")
    download_path = artifacts.download()

    # Check what was actually downloaded
    print(f"Downloaded to: {download_path}")

    # Find the correct path to the model file
    import os
    model_file_path = None

    # Check in the download directory
    if os.path.exists(os.path.join(download_path, "best_model.pth")):
        model_file_path = os.path.join(download_path, "best_model.pth")
    # Check in current directory
    elif os.path.exists("best_model.pth"):
        model_file_path = "best_model.pth"
    # Search recursively
    else:
        for root, dirs, files in os.walk("."):
            if "best_model.pth" in files:
                model_file_path = os.path.join(root, "best_model.pth")
                break

    if model_file_path and os.path.exists(model_file_path):
        print(f"Found model file at: {model_file_path}")

        # Create final artifact with correct path
        final_artifact = wandb.Artifact(
            name="final_best_resnet_model",
            type="model",
            description=f"Final best ResNet model with {best_val_acc:.2f}% validation accuracy"
        )
        final_artifact.add_file(model_file_path)

        # Upload the final artifact
        with wandb.init(project="Facial_Expression_Recognition_5", name="final_model_upload"):
            wandb.log_artifact(final_artifact)
            wandb.log({
                "final_best_val_accuracy": best_val_acc,
                "model_architecture": "ResNet",
                "ready_for_testing": True
            })

        print(f"\nFinal model uploaded to wandb as 'final_best_resnet_model'")
        print("You can now load this model in the future for testing on any dataset!")

        print("\nTo test on a different dataset in the future, use:")
        print("evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')")

    else:
        print("Error: Could not find the downloaded model file!")
        print("Files in current directory:")
        for item in os.listdir("."):
            print(f"  {item}")

else:
    print("No successful runs found!")


Best run: lyric-sweep-1
Best validation accuracy: 57.65%
Best hyperparameters: {'epochs': 30, 'patience': 8, 'batch_size': 32, 'dropout_rate': 0.5479010231390392, 'weight_decay': 0.0007072641496874819, 'learning_rate': 0.0003507942900207464, 'label_smoothing': 0.22223961117304525}


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Downloaded to: /content/artifacts/best_model_lyric-sweep-1:v10
Found model file at: /content/artifacts/best_model_lyric-sweep-1:v10/best_model.pth


0,1
final_best_val_accuracy,▁

0,1
final_best_val_accuracy,57.64542
model_architecture,ResNet
ready_for_testing,True



Final model uploaded to wandb as 'final_best_resnet_model'
You can now load this model in the future for testing on any dataset!

To test on a different dataset in the future, use:
evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')
