# Production Training on Google Colab - Option B

## Target: 78-85% Accuracy

### Features:
- **4 datasets**: FER2013, UTKFace, RAF-DB, AffectNet (96K images)
- **Backbone**: EfficientNet-B2 (pretrained)
- **Loss**: Focal Loss + Label Smoothing
- **Augmentation**: Advanced (Albumentations + Mixup/Cutmix)
- **Regularization**: Dropout 0.5 + Weight Decay 1e-4
- **Training**: 100 epochs with early stopping

### Estimated Time:
- T4 GPU: 10-12 hours
- V100 GPU: 4-6 hours
- A100 GPU: 2-3 hours

### Pre-Run Checklist:
- [ ] GPU enabled (Runtime > Change runtime type > T4 GPU)
- [ ] kaggle.json file ready (from https://www.kaggle.com/settings)
- [ ] Google Drive mounted (for saving results)

---

## IMPORTANT:
Run cells in order. Do NOT skip cells.

In [None]:
# ============================================================
# CELL 1: CHECK GPU
# ============================================================

import torch
import torch.cuda as cuda

print("PyTorch version:", torch.__version__)
print("CUDA available:", cuda.is_available())

if cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU device:", cuda.get_device_name(0))
    print(f"GPU memory: {cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
else:
    print("WARNING: No GPU available! Training will be very slow.")

In [None]:
# ============================================================
# CELL 2: CLONE REPOSITORY
# ============================================================

import os
from pathlib import Path

repo_url = "https://github.com/khoiabc2020/age-gender-emotion-detection.git"
repo_dir = Path("/content/repo")

if repo_dir.exists():
    print("[INFO] Repository exists, pulling latest changes...")
    !cd /content/repo && git pull
else:
    print("[INFO] Cloning repository...")
    !git clone {repo_url} /content/repo

# Change to project directory
%cd /content/repo/training_experiments
print("\n[OK] Repository ready!")
print(f"Working directory: {os.getcwd()}")

In [None]:
# ============================================================
# CELL 3: SETUP KAGGLE API
# ============================================================

from google.colab import files
import os

print("[INFO] Please upload your kaggle.json file")
print("[INFO] Get it from: https://www.kaggle.com/settings\n")

uploaded = files.upload()

if 'kaggle.json' in uploaded:
    !mkdir -p ~/.kaggle
    !mv kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json
    print("\n[OK] Kaggle API configured!")
else:
    print("\n[ERROR] kaggle.json not found!")

In [None]:
# ============================================================
# CELL 4: DOWNLOAD 4 DATASETS (PRODUCTION)
# ============================================================

print("=" * 60)
print("DOWNLOADING 4 DATASETS FOR PRODUCTION TRAINING")
print("Total: ~1.5GB | Time: ~20-30 minutes")
print("=" * 60)

# Install kagglehub if needed
%pip install -q kagglehub

import kagglehub
import json
from pathlib import Path

dataset_paths = {}

# 1. FER2013 - Emotion Recognition Dataset
print("\n[1/4] Downloading FER2013 (Emotion)...")
print("      Size: ~60MB | Time: ~3 minutes")
fer2013_path = kagglehub.dataset_download("msambare/fer2013")
dataset_paths['fer2013'] = fer2013_path
print(f"      [OK] FER2013: {fer2013_path}")

# 2. UTKFace - Age & Gender Dataset
print("\n[2/4] Downloading UTKFace (Age/Gender)...")
print("      Size: ~500MB | Time: ~5 minutes")
utkface_path = kagglehub.dataset_download("jangedoo/utkface-new")
dataset_paths['utkface'] = utkface_path
print(f"      [OK] UTKFace: {utkface_path}")

# 3. RAF-DB - High-Quality Emotion Dataset (Optional)
print("\n[3/4] Downloading RAF-DB (High-Quality Emotion)...")
print("      Size: ~200MB | Time: ~5 minutes")
try:
    rafdb_datasets = [
        "shuvoalok/raf-db-dataset",
        "alex1233213/raf-db"
    ]
    rafdb_path = None
    for dataset in rafdb_datasets:
        try:
            rafdb_path = kagglehub.dataset_download(dataset)
            dataset_paths['rafdb'] = rafdb_path
            print(f"      [OK] RAF-DB: {rafdb_path}")
            break
        except:
            continue
    if rafdb_path is None:
        print(f"      [WARN] RAF-DB not available, continuing without it")
except Exception as e:
    print(f"      [WARN] RAF-DB error: {e}")

# 4. AffectNet - Large-scale Emotion Dataset (Optional)
print("\n[4/4] Downloading AffectNet subset...")
print("      Size: ~250MB | Time: ~5 minutes")
try:
    affectnet_datasets = [
        "noamsegal/affectnet-training-data",
        "tom99763/affectnet-cnn-validation"
    ]
    affectnet_path = None
    for dataset in affectnet_datasets:
        try:
            affectnet_path = kagglehub.dataset_download(dataset)
            dataset_paths['affectnet'] = affectnet_path
            print(f"      [OK] AffectNet: {affectnet_path}")
            break
        except:
            continue
    if affectnet_path is None:
        print(f"      [WARN] AffectNet not available, continuing without it")
except Exception as e:
    print(f"      [WARN] AffectNet error: {e}")

# Save paths to JSON for production training
paths_file = '/content/dataset_paths.json'
with open(paths_file, 'w') as f:
    json.dump(dataset_paths, f, indent=2)

# Also save legacy .txt format for compatibility
legacy_file = '/content/dataset_paths.txt'
with open(legacy_file, 'w') as f:
    f.write(f"FER2013: {dataset_paths['fer2013']}\n")
    f.write(f"UTKFace: {dataset_paths['utkface']}\n")

print("\n" + "=" * 60)
print("[OK] DATASET DOWNLOAD COMPLETE")
print("=" * 60)
print(f"\nTotal datasets: {len(dataset_paths)}")
for name, path in dataset_paths.items():
    print(f"  - {name.upper()}: {path}")
print(f"\n[INFO] Paths saved to: {paths_file}")
print("=" * 60)

In [None]:
# ============================================================
# CELL 5: INSTALL PRODUCTION DEPENDENCIES
# ============================================================

print("=" * 60)
print("INSTALLING PRODUCTION DEPENDENCIES")
print("=" * 60)

print("\n[INFO] Installing packages for production training...")
print("[INFO] This includes: timm, albumentations, imgaug, etc.")
print("[INFO] Time: ~2-3 minutes\n")

%pip install -q timm albumentations imgaug tensorboard onnx onnxscript onnxruntime torchmetrics opencv-python

print("\n[OK] All production dependencies installed!")
print("=" * 60)

In [None]:
# ============================================================
# CELL 6: RUN PRODUCTION TRAINING
# ============================================================

import os
from pathlib import Path
import torch

print("=" * 60)
print("PRODUCTION TRAINING - OPTION B")
print("Target: 78-85% Accuracy")
print("=" * 60)

# Display configuration
print("\n[CONFIG] Training Configuration:")
print("  Backbone: EfficientNet-B2")
print("  Epochs: 100 (with early stopping)")
print("  Batch Size: 64")
print("  Learning Rate: 0.0001")
print("  Optimizer: AdamW")
print("  Loss: Focal Loss + Label Smoothing")
print("  Augmentation: Advanced (Albumentations + Mixup/Cutmix)")
print("  Regularization: Dropout 0.5 + Weight Decay 1e-4")

# Estimate time
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    if 'T4' in gpu_name:
        print("\n[INFO] Using T4 GPU - Estimated time: 10-12 hours")
    elif 'V100' in gpu_name:
        print("\n[INFO] Using V100 GPU - Estimated time: 4-6 hours")
    elif 'A100' in gpu_name:
        print("\n[INFO] Using A100 GPU - Estimated time: 2-3 hours")

# Verify files
if not os.path.exists('/content/dataset_paths.json'):
    print("[ERROR] Dataset paths file not found!")
    print("[INFO] Please run Cell 4 first")
    raise FileNotFoundError("Dataset paths required")

print("\n[START] Starting production training...")
print("=" * 60)
print("\n")

# Pull latest code
%cd /content/repo
!git pull
%cd /content/repo/training_experiments

# Run production training (choose one script)
!python train_production.py \
    --data_paths /content/dataset_paths.json \
    --epochs 100 \
    --batch_size 64 \
    --lr 0.0001 \
    --patience 15 \
    --save_dir /content/checkpoints_production

print("\n" + "=" * 60)
print("[OK] TRAINING COMPLETE!")
print("=" * 60)

In [None]:
# ============================================================
# CELL 7: EVALUATE RESULTS
# ============================================================

import json
from pathlib import Path

print("=" * 60)
print("TRAINING RESULTS")
print("=" * 60)

# Load results
results_file = Path('/content/checkpoints_production/training_results.json')

if results_file.exists():
    with open(results_file) as f:
        results = json.load(f)
    
    print(f"\n[SUCCESS] Training Completed!")
    print(f"\nBest Validation Accuracy: {results.get('best_accuracy', 0):.2f}%")
    print(f"Best Epoch: {results.get('best_epoch', 'N/A')}")
    print(f"Total Epochs: {results.get('total_epochs', 'N/A')}")
    
    # Check if target achieved
    best_acc = results.get('best_accuracy', 0)
    if best_acc >= 78:
        print("\n[OK] TARGET ACHIEVED! (78-85%)")
        print("Model is production-ready!")
    elif best_acc >= 75:
        print("\n[OK] Good accuracy, close to target")
    else:
        print("\n[WARN] Below target")
    
    print(f"\n[INFO] Model saved to:")
    print(f"  - /content/checkpoints_production/best_model.pth")
    
else:
    print("\n[WARN] Results file not found")

print("=" * 60)

In [None]:
# ============================================================
# CELL 8: SAVE TO GOOGLE DRIVE
# ============================================================

from google.colab import drive
import shutil
from pathlib import Path
from datetime import datetime

# Mount Drive
drive.mount('/content/drive')

# Create directory
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
drive_dir = Path(f'/content/drive/MyDrive/SmartRetailAI_Models/production_{timestamp}')
drive_dir.mkdir(parents=True, exist_ok=True)

print("=" * 60)
print("SAVING TO GOOGLE DRIVE")
print("=" * 60)

# Copy files
files_to_copy = [
    ('/content/checkpoints_production/best_model.pth', 'best_model_production.pth'),
    ('/content/checkpoints_production/training_results.json', 'training_results.json'),
    ('/content/dataset_paths.json', 'dataset_paths.json')
]

copied = []
total_size = 0

for src, dst_name in files_to_copy:
    src_path = Path(src)
    if src_path.exists():
        dst_path = drive_dir / dst_name
        shutil.copy2(src_path, dst_path)
        size = dst_path.stat().st_size / (1024*1024)
        total_size += size
        print(f"  [OK] {dst_name} ({size:.1f} MB)")
        copied.append(dst_name)
    else:
        print(f"  [WARN] {dst_name} - not found")

print(f"\n[OK] Saved {len(copied)} files ({total_size:.1f} MB total)")
print(f"\n[INFO] Location: {drive_dir}")
print("=" * 60)