<a href="https://colab.research.google.com/github/LukaJinc/Colab-Pytorch-Kaggle/blob/main/notebooks/loading_kaggle_data_to_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Downloading Kaggle data sets directly into Colab**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

! pip install kaggle

! mkdir ~/.kaggle

!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

! chmod 600 ~/.kaggle/kaggle.json

! kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge

! unzip challenges-in-representation-learning-facial-expression-recognition-challenge

!pip install -q wandb

Mounted at /content/drive
Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 86% 244M/285M [00:00<00:00, 749MB/s] 
100% 285M/285M [00:02<00:00, 135MB/s]
Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [2]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
import wandb
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

wandb.init(project="facial-expression-recognition", name="robust-cnn-v1")

config = {
    "epochs": 50,  # Increased epochs for better convergence with augmentation
    "batch_size": 128, # Can often increase batch size with a better model
    "learning_rate": 1e-3,
    "image_size": 48,
    "num_classes": 7,
    "num_workers": 4 # For faster data loading
}
wandb.config.update(config)

# Data Loading and Efficient Pre-processing
# Processing the pixel string every time is slow.
# We do it once here to create a new column of numpy arrays.
def string_to_array(pixel_string):
    return np.array(pixel_string.split(), dtype=np.uint8).reshape(config["image_size"], config["image_size"])

# Load the main training data
data_path = os.path.expanduser("/content/train.csv") # Assumes data is in home directory
if not os.path.exists(data_path):
    print(f"Error: Data file not found at {data_path}")
    print("Please download the 'train.csv' from the Kaggle competition and place it in your home directory or update the path.")
    pass

full_train_df = pd.read_csv(data_path)
full_train_df['pixels_array'] = full_train_df['pixels'].apply(string_to_array)

# Split into training and validation sets
train_df, val_df = train_test_split(
    full_train_df,
    test_size=0.1,
    stratify=full_train_df['emotion'],
    random_state=42
)

class FacialExpressionDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Directly access the pre-processed numpy array
        image_array = self.df.iloc[idx]['pixels_array']
        image = Image.fromarray(image_array) # Convert to PIL Image for transforms
        label = int(self.df.iloc[idx]['emotion'])

        if self.transform:
            image = self.transform(image)

        return image, label

# Data Augmentation and Transforms
# Augmentation is crucial for this small, noisy dataset.
# We create a strong augmentation pipeline for training and a simple one for validation.
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.RandomErasing(p=0.2, scale=(0.02, 0.1), ratio=(0.3, 3.3), value=0)
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = FacialExpressionDataset(train_df, transform=train_transform)
val_dataset = FacialExpressionDataset(val_df, transform=val_transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=config["batch_size"],
    shuffle=True,
    num_workers=config["num_workers"],
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=config["batch_size"],
    shuffle=False,
    num_workers=config["num_workers"],
    pin_memory=True
)

# Robust Model Architecture
# BatchNorm stabilizes training, and AdaptiveAvgPool makes the
# classifier robust to input size changes and is more effective than a simple Flatten.
class RobustCNN(nn.Module):
    def __init__(self, num_classes):
        super(RobustCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 1024),
            nn.BatchNorm1d(1024), nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RobustCNN(num_classes=config["num_classes"]).to(device)

# Loss, Optimizer, and Scheduler
# To combat the severe class imbalance in the dataset.
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_df['emotion']),
    y=train_df['emotion'].to_numpy()
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])

# Using scheduler to automatically reduce the learning rate when validation stops improving.
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

# Training Loop
wandb.watch(model, log="all")
best_val_acc = 0.0

for epoch in range(config["epochs"]):
    model.train()
    train_loss, train_correct = 0.0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        train_correct += (preds == labels).sum().item()

    # Validation
    model.eval()
    val_loss, val_correct = 0.0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_acc = train_correct / len(train_loader.dataset)
    val_loss /= len(val_loader.dataset)
    val_acc = val_correct / len(val_loader.dataset)

    # Step the scheduler based on validation accuracy
    scheduler.step(val_acc)

    # Log metrics to W&B
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "val_loss": val_loss,
        "val_accuracy": val_acc,
        "learning_rate": optimizer.param_groups[0]['lr']
    })

    print(f"Epoch {epoch+1:02d}: Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}, Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")

    # Save the best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        wandb.save("best_model.pth") # Save best model to W&B
        print(f"New best model saved with validation accuracy: {val_acc:.4f}")


# Finish W&B run
wandb.finish()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlchik22[0m ([33mlchik22-free-uni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch 01: Train Acc=0.2186, Val Acc=0.3002, Train Loss=1.9795, Val Loss=1.7807
New best model saved with validation accuracy: 0.3002
Epoch 02: Train Acc=0.2982, Val Acc=0.3382, Train Loss=1.7941, Val Loss=1.6008
New best model saved with validation accuracy: 0.3382
Epoch 03: Train Acc=0.3764, Val Acc=0.4479, Train Loss=1.6299, Val Loss=1.4763
New best model saved with validation accuracy: 0.4479
Epoch 04: Train Acc=0.4109, Val Acc=0.4709, Train Loss=1.5313, Val Loss=1.4314
New best model saved with validation accuracy: 0.4709
Epoch 05: Train Acc=0.4393, Val Acc=0.4828, Train Loss=1.4735, Val Loss=1.2984
New best model saved with validation accuracy: 0.4828
Epoch 06: Train Acc=0.4557, Val Acc=0.5026, Train Loss=1.4345, Val Loss=1.3099
New best model saved with validation accuracy: 0.5026
Epoch 07: Train Acc=0.4676, Val Acc=0.5378, Train Loss=1.3871, Val Loss=1.2936
New best model saved with validation accuracy: 0.5378
Epoch 08: Train Acc=0.4795, Val Acc=0.5468, Train Loss=1.3693, Val Lo

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
learning_rate,████████████████▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
train_accuracy,▁▂▄▄▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████████████
train_loss,█▇▆▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▄▄▅▆▆▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████
val_loss,█▆▅▅▄▃▃▃▃▂▂▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
learning_rate,6e-05
train_accuracy,0.63275
train_loss,0.91012
val_accuracy,0.65552
val_loss,1.00354
