# FaceNet Training - Kaggle

Notebook huấn luyện FaceNet trên Kaggle với GPU miễn phí.

## Chuẩn bị:
1. Upload dataset `CelebA_Aligned_Balanced` lên Kaggle Datasets
2. Add dataset vào notebook này
3. Bật GPU: Settings > Accelerator > GPU P100/T4

In [None]:
# Detect môi trường
import os
import sys

IS_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ
print(f"Kaggle environment: {IS_KAGGLE}")

if not IS_KAGGLE:
    print("WARNING: Notebook này được thiết kế cho Kaggle!")

In [None]:
# Cấu hình đường dẫn Kaggle
ROOT = "/kaggle/working/FaceRecognition"
CHECKPOINT_DIR = "/kaggle/working/checkpoints/facenet"

# Dataset path - thay đổi theo tên dataset của bạn trên Kaggle
KAGGLE_DATASET_NAME = "celeba-aligned-balanced"
DATA_DIR = f"/kaggle/input/{KAGGLE_DATASET_NAME}"

os.makedirs(CHECKPOINT_DIR, exist_ok=True)

print(f"ROOT: {ROOT}")
print(f"DATA_DIR: {DATA_DIR}")
print(f"CHECKPOINT_DIR: {CHECKPOINT_DIR}")

In [None]:
# === CAU HINH CHECKPOINT DATASET ===
CHECKPOINT_DATASET_NAME = ""

import shutil
import glob

if CHECKPOINT_DATASET_NAME:
    checkpoint_input_dir = f"/kaggle/input/{CHECKPOINT_DATASET_NAME}"
    if os.path.exists(checkpoint_input_dir):
        print(f"[OK] Tim thay checkpoint dataset")
        pth_files = glob.glob(os.path.join(checkpoint_input_dir, "**/*.pth"), recursive=True)
        if pth_files:
            os.makedirs(CHECKPOINT_DIR, exist_ok=True)
            for pth_file in pth_files:
                dest_path = os.path.join(CHECKPOINT_DIR, os.path.basename(pth_file))
                if not os.path.exists(dest_path):
                    shutil.copy(pth_file, dest_path)
                    print(f"[COPY] {os.path.basename(pth_file)}")
else:
    print("[INFO] Training tu dau (khong co checkpoint)")

In [None]:
# Kiểm tra Kaggle dataset
print("=== KAGGLE INPUT DATASETS ===")
!ls -la /kaggle/input/

if os.path.exists(DATA_DIR):
    print(f"\n[OK] Dataset found at: {DATA_DIR}")
    !ls -la {DATA_DIR}
else:
    print(f"\n[ERROR] Dataset not found at: {DATA_DIR}")

In [None]:
# Cau hinh GitHub token
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    GITHUB_TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
    print("[OK] Da lay GITHUB_TOKEN")
except Exception as e:
    GITHUB_TOKEN = None
    print("[INFO] Su dung public URL")

if GITHUB_TOKEN:
    REPO_URL = f"https://{GITHUB_TOKEN}@github.com/sin0235/FaceRecognition.git"
else:
    REPO_URL = "https://github.com/sin0235/FaceRecognition.git"

In [None]:
# Clone repository
if os.path.exists(ROOT):
    print("Repository da ton tai, dang pull updates...")
    %cd {ROOT}
    if GITHUB_TOKEN:
        !git remote set-url origin {REPO_URL}
    !git pull
else:
    print(f"Dang clone repository...")
    !git clone {REPO_URL} {ROOT}
    %cd {ROOT}

print(f"\nWorking directory: {os.getcwd()}")
!ls -la

In [None]:
# Thêm ROOT vào Python path
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)
    print(f"Da them {ROOT} vao Python path")

In [None]:
# Cài đặt dependencies (KHONG cai lai torch)
print("Cai dat dependencies...")
!pip install -q facenet-pytorch --no-deps
!pip install -q opencv-python-headless Pillow scikit-learn tqdm pyyaml
print("\nHoan tat cai dat!")

In [None]:
# Kiểm tra GPU
import torch

print("=== GPU INFO ===")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"  Device: {torch.cuda.get_device_name(0)}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Kiểm tra dữ liệu training
train_img_dir = os.path.join(DATA_DIR, "train")
val_img_dir = os.path.join(DATA_DIR, "val")

if not os.path.exists(train_img_dir):
    train_img_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "train")
    val_img_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "val")

print("=== KIEM TRA DU LIEU ===")

if os.path.exists(train_img_dir):
    train_identities = [d for d in os.listdir(train_img_dir) 
                        if os.path.isdir(os.path.join(train_img_dir, d))]
    print(f"[OK] Train: {len(train_identities)} identities")
else:
    print(f"[ERROR] Train folder not found")

if os.path.exists(val_img_dir):
    val_identities = [d for d in os.listdir(val_img_dir) 
                      if os.path.isdir(os.path.join(val_img_dir, d))]
    print(f"[OK] Val: {len(val_identities)} identities")
else:
    print(f"[ERROR] Val folder not found")

## Training FaceNet với Hard Negative Mining

Sử dụng script `train_facenet.py` với `--mining hard` để tự động:
- Mine hard triplets mỗi batch dựa trên embeddings hiện tại
- Tạo embeddings discriminative hơn (similarity giữa identities khác nhau thấp hơn)
- Logging đầy đủ metrics (loss, accuracy, distance, GPU memory, epoch time)

In [None]:
# Override config paths cho Kaggle
import yaml

# Tìm train/val directories
if os.path.exists(os.path.join(DATA_DIR, 'train')):
    data_root = DATA_DIR
elif os.path.exists(os.path.join(DATA_DIR, 'CelebA_Aligned_Balanced', 'train')):
    data_root = os.path.join(DATA_DIR, 'CelebA_Aligned_Balanced')
else:
    raise FileNotFoundError(f"Cannot find train/val folders in {DATA_DIR}")

print(f"Data root: {data_root}")
print(f"Checkpoint dir: {CHECKPOINT_DIR}")

In [None]:
# Override config file paths cho Kaggle environment
config_path = os.path.join(ROOT, 'configs/facenet_kaggle.yaml')
with open(config_path, 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)

# Update paths
config['dataset']['train_data_root'] = os.path.join(data_root, 'train')
config['dataset']['val_data_root'] = os.path.join(data_root, 'val')
config['path'] = {
    'checkpoint_dir': CHECKPOINT_DIR,
    'logs_dir': os.path.join(CHECKPOINT_DIR, 'logs')
}

# Save temp config
temp_config_path = '/kaggle/working/facenet_kaggle_temp.yaml'
with open(temp_config_path, 'w', encoding='utf-8') as f:
    yaml.dump(config, f)

print(f"[OK] Created temp config: {temp_config_path}")
print(f"\nConfig overview:")
print(f"  Epochs: {config['training']['num_epochs']}")
print(f"  Batch size: {config['training']['batch_size']}")
print(f"  LR: {config['training']['learning_rate']}")
print(f"  Margin: {config['model']['margin']}")
print(f"  Scheduler step: {config['training']['scheduler_step']}")
print(f"  Scheduler gamma: {config['training']['scheduler_gamma']}")
print(f"  Patience: {config['training']['patience']}")

In [None]:
# Train FaceNet với HARD NEGATIVE MINING
print("="*60)
print("BAT DAU TRAINING FACENET - HARD NEGATIVE MINING")
print("="*60)

!python models/facenet/train_facenet.py \
    --config {temp_config_path} \
    --mining hard

In [None]:
# Visualize training history
import matplotlib.pyplot as plt
import json

history_path = os.path.join(CHECKPOINT_DIR, 'logs', 'training_history.json')

if os.path.exists(history_path):
    with open(history_path, 'r') as f:
        history = json.load(f)
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    # Loss
    axes[0, 0].plot(history['train_loss'], label='Train')
    axes[0, 0].plot(history['val_loss'], label='Val')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Triplet Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Triplet Accuracy (fast metric)
    axes[0, 1].plot(history['train_acc'], label='Train')
    axes[0, 1].plot(history.get('val_triplet_acc', []), label='Val')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Triplet Accuracy (d(a,p) < d(a,n))')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Verification Accuracy (realistic metric)
    if 'val_ver_acc' in history and history['val_ver_acc']:
        axes[0, 2].plot(history['val_ver_acc'], label='Verification Acc', color='green', linewidth=2)
        axes[0, 2].set_xlabel('Epoch')
        axes[0, 2].set_ylabel('Accuracy')
        axes[0, 2].set_title('Verification Accuracy (Realistic)')
        axes[0, 2].legend()
        axes[0, 2].grid(True)
        axes[0, 2].set_ylim([0, 1])
    else:
        axes[0, 2].text(0.5, 0.5, 'No verification data', ha='center', va='center')
        axes[0, 2].set_title('Verification Accuracy')
    
    # Positive Distance
    axes[1, 0].plot(history['train_pos_dist'], label='Train')
    axes[1, 0].plot(history['val_pos_dist'], label='Val')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Distance')
    axes[1, 0].set_title('Positive Distance (same identity)')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Negative Distance
    axes[1, 1].plot(history['train_neg_dist'], label='Train')
    axes[1, 1].plot(history['val_neg_dist'], label='Val')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Distance')
    axes[1, 1].set_title('Negative Distance (different identity)')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    # Verification Threshold
    if 'val_ver_threshold' in history and history['val_ver_threshold']:
        axes[1, 2].plot(history['val_ver_threshold'], label='Optimal Threshold', color='purple', linewidth=2)
        axes[1, 2].set_xlabel('Epoch')
        axes[1, 2].set_ylabel('Threshold')
        axes[1, 2].set_title('Verification Threshold (Cosine Similarity)')
        axes[1, 2].legend()
        axes[1, 2].grid(True)
        axes[1, 2].set_ylim([0, 1])
    else:
        axes[1, 2].text(0.5, 0.5, 'No threshold data', ha='center', va='center')
        axes[1, 2].set_title('Verification Threshold')
    
    plt.tight_layout()
    plt.savefig(os.path.join(CHECKPOINT_DIR, 'training_curves.png'), dpi=150)
    plt.show()
    
    # Print summary
    print(f"\n{'='*60}")
    print("TRAINING SUMMARY")
    print(f"{'='*60}")
    print(f"Mining strategy: {history.get('mining_strategy', 'N/A')}")
    print(f"Total time: {history.get('total_time_minutes', 0):.2f} minutes")
    print(f"Best val loss: {history.get('best_val_loss', 0):.4f}")
    
    # Show final metrics
    if history.get('val_triplet_acc'):
        final_triplet_acc = history['val_triplet_acc'][-1]
        print(f"Final triplet acc: {final_triplet_acc:.4f} (fast metric)")
    
    if history.get('val_ver_acc'):
        final_ver_acc = history['val_ver_acc'][-1]
        final_threshold = history.get('val_ver_threshold', [0.5])[-1]
        print(f"Final verification acc: {final_ver_acc:.4f} @ threshold={final_threshold:.2f} (realistic)")
    
    print(f"{'='*60}")
else:
    print(f"[ERROR] History file not found: {history_path}")

## Download Checkpoint

In [None]:
# Hiển thị checkpoint files
print("=== CHECKPOINT FILES ===")
!ls -lh {CHECKPOINT_DIR}

print("\n=== Download ===")
print(f"Best model: {CHECKPOINT_DIR}/facenet_best.pth")
print(f"Last model: {CHECKPOINT_DIR}/facenet_last.pth")

In [None]:
# Zip checkpoint folder de tai ve
import shutil

zip_name = "facenet_checkpoints"
zip_path = f"/kaggle/working/{zip_name}"

shutil.make_archive(zip_path, "zip", CHECKPOINT_DIR)

print(f"[OK] Da tao file zip: {zip_path}.zip")
print(f"\nDownload file nay tu panel Output ben phai.")
!ls -lh /kaggle/working/*.zip