# Experiment 8

In [2]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive


In [3]:
! mkdir ~/.kaggle

In [4]:
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 97% 277M/285M [00:03<00:00, 51.8MB/s]
100% 285M/285M [00:06<00:00, 49.1MB/s]


In [7]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge.zip

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [8]:
!pip install wandb onnx -Uq

# data

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [11]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkonstantine25b[0m ([33mkonstantine25b-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
class FER2013Dataset(Dataset):
    def __init__(self, data_source, transform=None):
        if isinstance(data_source, str):
           self.data = pd.read_csv(data_source)
        else:
           self.data = data_source
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx]['pixels']
        emotion = self.data.iloc[idx]['emotion']

        image = np.array([int(pixel) for pixel in pixels.split()]).reshape(48, 48)
        image = image.astype(np.float32) / 255.0

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.FloatTensor(image).unsqueeze(0)

        return image, emotion

In [13]:
def get_transforms():
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Pad(4),
        transforms.RandomCrop(48),
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    val_test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    return train_transform, val_test_transform

In [14]:
def create_train_val_test_splits(csv_file):
    data = pd.read_csv(csv_file)

    train_size = int(0.72 * len(data))
    val_size = int(0.18 * len(data))
    test_size = len(data) - train_size - val_size

    train_data = data.iloc[:train_size]
    val_data = data.iloc[train_size:train_size + val_size]
    test_data = data.iloc[train_size + val_size:]

    return train_data, val_data, test_data

In [15]:
def create_dataloaders(csv_file, batch_size=64):
    train_transform, val_test_transform = get_transforms()

    train_data, val_data, test_data = create_train_val_test_splits(csv_file)

    train_dataset = FER2013Dataset(train_data, transform=train_transform)
    val_dataset = FER2013Dataset(val_data, transform=val_test_transform)
    test_dataset = FER2013Dataset(test_data, transform=val_test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader, test_loader

In [16]:
class BasicResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [17]:
class EnhancedCNN(nn.Module):
    def __init__(self, dropout_rate=0.3, hidden_dim=128):
        super(EnhancedCNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.resnet_block = BasicResidualBlock(64, 128, stride=2)

        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(128 * 3 * 3, hidden_dim)
        self.bn4 = nn.BatchNorm1d(hidden_dim)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_dim, 7)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))

        x = self.resnet_block(x)

        x = self.pool3(F.relu(self.bn3(self.conv3(x))))

        x = x.view(x.size(0), -1)
        x = F.relu(self.bn4(self.fc1(x)))
        x = self.dropout1(x)
        x = self.fc2(x)

        return x

In [18]:
def test_overfitting():
    print("Testing enhanced CNN with ResNet block on small dataset...")

    model = EnhancedCNN(dropout_rate=0.0).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_loader, _, _ = create_dataloaders('train.csv', batch_size=32)

    small_batch = []
    for i, (data, target) in enumerate(train_loader):
        small_batch.extend(list(zip(data, target)))
        if len(small_batch) >= 20:
            break

    small_batch = small_batch[:20]

    for epoch in range(30):
        model.train()
        total_loss = 0
        correct = 0

        batch_data = []
        batch_targets = []

        for data, target in small_batch:
            batch_data.append(data)
            batch_targets.append(target)

        batch_data = torch.stack(batch_data).to(device)
        batch_targets = torch.tensor(batch_targets).to(device)

        optimizer.zero_grad()
        output = model(batch_data)
        loss = criterion(output, batch_targets)
        loss.backward()
        optimizer.step()

        total_loss = loss.item()
        pred = output.argmax(dim=1)
        correct = pred.eq(batch_targets).sum().item()

        accuracy = 100. * correct / len(small_batch)

        print(f"Overfit Epoch {epoch+1}/30, Loss: {total_loss:.4f}, Acc: {accuracy:.2f}%")

        if accuracy >= 95.0:
            print("Enhanced CNN can overfit successfully!")
            break

    print("Overfitting test completed.\n")

In [19]:
def compute_loss(loader, model, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

    return total_loss / len(loader), 100. * correct / total

In [34]:
def train_model(config=None):
    with wandb.init(project="Facial_Expression_Recognition_8", config=config):
        config = wandb.config

        model = EnhancedCNN(dropout_rate=config.dropout_rate, hidden_dim=config.hidden_dim).to(device)
        train_loader, val_loader, test_loader = create_dataloaders('train.csv', config.batch_size)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.5, verbose=True)

        best_val_acc = 0
        best_model_path = f"best_model_{wandb.run.id}.pt"
        patience_counter = 0

        wandb.watch(model, log="gradients", log_freq=100)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')

            for batch_idx, (data, target) in enumerate(progress_bar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                pred = output.argmax(dim=1)
                train_correct += pred.eq(target).sum().item()
                train_total += target.size(0)

                if batch_idx % 50 == 0:
                    wandb.log({
                        "batch_loss": loss.item(),
                        "learning_rate": optimizer.param_groups[0]['lr'],
                        "epoch": epoch
                    })

                progress_bar.set_postfix({
                    'loss': f'{loss.item():.3f}',
                    'acc': f'{100.*train_correct/train_total:.1f}%'
                })

            train_acc = 100. * train_correct / train_total
            train_loss = train_loss / len(train_loader)

            val_loss, val_acc = compute_loss(val_loader, model, criterion, device)

            scheduler.step(val_acc)

            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_loss": val_loss,
                "val_accuracy": val_acc,
                "train_val_gap": train_acc - val_acc
            })

            print(f"Epoch {epoch+1}/{config.epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0

                torch.save({
                    'model_state_dict': model.state_dict(),
                    'model_config': {
                        'dropout_rate': config.dropout_rate,
                        'hidden_dim': config.hidden_dim
                    },
                    'training_config': dict(config),
                    'val_accuracy': val_acc,
                    'epoch': epoch
                }, best_model_path)

                model_artifact = wandb.Artifact(
                    name=f"best_model_{wandb.run.name}",
                    type="model",
                    description=f"Best enhanced CNN model with validation accuracy: {val_acc:.2f}%"
                )
                model_artifact.add_file(best_model_path)
                wandb.log_artifact(model_artifact)

                print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
            else:
                patience_counter += 1

            if patience_counter >= config.patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break

        wandb.log({"best_val_accuracy": best_val_acc})
        print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

        return best_val_acc


In [35]:
def evaluate_model_on_testset(model_path, test_loader):
    checkpoint = torch.load(model_path, map_location=device)

    model_config = checkpoint['model_config']
    model = EnhancedCNN(
        dropout_rate=model_config['dropout_rate'],
        hidden_dim=model_config['hidden_dim']
    ).to(device)

    model.load_state_dict(checkpoint['model_state_dict'])

    criterion = nn.CrossEntropyLoss()
    test_loss, test_acc = compute_loss(test_loader, model, criterion, device)

    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds, target_names=emotion_labels))

    cm = confusion_matrix(all_targets, all_preds)

    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title('Confusion Matrix - Facial Expression Recognition (Enhanced CNN)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    return test_acc, all_preds, all_targets


In [33]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'best_val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.0001,
            'max': 0.01
        },
        'batch_size': {
            'values': [32, 64]
        },
        'dropout_rate': {
            'distribution': 'uniform',
            'min': 0.2,
            'max': 0.5
        },
        'weight_decay': {
            'distribution': 'log_uniform_values',
            'min': 1e-5,
            'max': 1e-3
        },
        'hidden_dim': {
            'values': [128, 256]
        },
        'epochs': {
            'value': 5
        },
        'patience': {
            'value': 5
        }
    }
}

In [36]:
test_overfitting()

Testing enhanced CNN with ResNet block on small dataset...
Overfit Epoch 1/30, Loss: 2.1351, Acc: 0.00%
Overfit Epoch 2/30, Loss: 0.8792, Acc: 95.00%
Enhanced CNN can overfit successfully!
Overfitting test completed.



In [37]:
sweep_id = wandb.sweep(sweep_config, project="Facial_Expression_Recognition_8")

Create sweep with ID: 5gipf1wf
Sweep URL: https://wandb.ai/konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8/sweeps/5gipf1wf


In [None]:
print("Running enhanced CNN hyperparameter sweep...")
wandb.agent(sweep_id, train_model, count=5)

Running enhanced CNN hyperparameter sweep...


[34m[1mwandb[0m: Agent Starting Run: x6a81bcr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.420663916009306
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00018751741936844473
[34m[1mwandb[0m: 	patience: 5
[34m[1mwandb[0m: 	weight_decay: 0.0006461563175495679


Epoch 1/5 [Train]:  82%|████████▏ | 264/323 [02:17<00:29,  1.97it/s, loss=1.769, acc=28.6%]

In [26]:
api = wandb.Api()
runs = api.runs("konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8")

best_run = None
best_val_acc = 0

In [27]:
for run in runs:
    if run.state == "finished" and "best_val_accuracy" in run.summary:
        val_acc = run.summary["best_val_accuracy"]
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_run = run

Epoch 1/30 [Train]:  60%|██████    | 388/646 [02:00<01:20,  3.21it/s, loss=1.659, acc=25.5%]
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1251, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/reductions.py", line 541, in rebuild_storage_fd
    fd = df.detach()
         ^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/resource_sharer.py", line 86, in get_connection
    c = Client(address, authkey=process.current_process().authkey)
  

In [28]:
if best_run:
    print(f"\nBest run: {best_run.name}")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    print(f"Best hyperparameters: {best_run.config}")


    artifacts = api.artifact(f"konstantine25b-free-university-of-tbilisi-/Facial_Expression_Recognition_8/best_model_{best_run.name}:latest")
    download_path = artifacts.download()

    print(f"Downloaded to: {download_path}")

    import os
    model_file_path = None


    if os.path.exists(os.path.join(download_path, "best_model.pth")):
        model_file_path = os.path.join(download_path, "best_model.pth")

    elif os.path.exists("best_model.pth"):
        model_file_path = "best_model.pth"
    else:
        for root, dirs, files in os.walk("."):
            if "best_model.pth" in files:
                model_file_path = os.path.join(root, "best_model.pth")
                break

    if model_file_path and os.path.exists(model_file_path):
        print(f"Found model file at: {model_file_path}")

        final_artifact = wandb.Artifact(
            name="final_best_resnet_model",
            type="model",
            description=f"Final best ResNet model with {best_val_acc:.2f}% validation accuracy"
        )
        final_artifact.add_file(model_file_path)

        with wandb.init(project="Facial_Expression_Recognition_8", name="final_model_upload"):
            wandb.log_artifact(final_artifact)
            wandb.log({
                "final_best_val_accuracy": best_val_acc,
                "model_architecture": "ResNet",
                "ready_for_testing": True
            })

        print(f"\nFinal model uploaded to wandb as 'final_best_resnet_model'")
        print("You can now load this model in the future for testing on any dataset!")

        print("\nTo test on a different dataset in the future, use:")
        print("evaluate_model_on_testset('path_to_downloaded_model.pth', 'your_test_dataset.csv')")

    else:
        print("Error: Could not find the downloaded model file!")
        print("Files in current directory:")
        for item in os.listdir("."):
            print(f"  {item}")

else:
    print("No successful runs found!")


Best run: scarlet-sweep-2
Best validation accuracy: 64.23%
Best hyperparameters: {'epochs': 30, 'patience': 5, 'batch_size': 32, 'hidden_dim': 128, 'dropout_rate': 0.32711369900475645, 'weight_decay': 0.00017971017828689558, 'learning_rate': 0.00048796996812685976}


0,1
batch_loss,█▆▁▁▆▄▁▄
epoch,▁▁▁▁▁▁▁▁
learning_rate,▁▁▁▁▁▁▁▁

0,1
batch_loss,1.82789
epoch,0.0
learning_rate,0.00523


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Downloaded to: /content/artifacts/best_model_scarlet-sweep-2:v19
Error: Could not find the downloaded model file!
Files in current directory:
  .config
  example_submission.csv
  icml_face_data.csv
  train.csv
  drive
  artifacts
  test.csv
  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  konstantine25b-free-university-of-tbilisi-
  wandb
  fer2013.tar.gz
  sample_data
