In [1]:
# -*- coding: utf-8 -*-

"""
TRANSFORMER V1: MULTI-SEED TRAINING (5 SEEDS)
Caches entire AVLips dataset once, then trains 5 models with different seeds
Each seed samples 2000 real + 2000 fake from cached data
Optimized for Google Colab Pro (53GB RAM, 22.5GB VRAM)
"""

# ============================================================================
# STEP 1: MOUNT DRIVE & IMPORTS
# ============================================================================

from google.colab import drive
drive.mount('/content/drive')

import os
import cv2
import time
import torch
import zipfile
import librosa
import numpy as np
import pandas as pd
import torch.nn as nn
from pathlib import Path
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torchvision.models as models
from torch.cuda.amp import autocast, GradScaler
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
import warnings

warnings.filterwarnings('ignore')
torch.backends.cudnn.benchmark = True
print("✅ Libraries imported successfully")
print(f"🔧 PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")

# ============================================================================
# STEP 2: EXTRACT AVLIPS DATASET
# ============================================================================

print("\n" + "="*80)
print("📦 EXTRACTING AVLIPS DATASET")
print("="*80)

zip_path = "/content/drive/MyDrive/CSE400 codes - 144/AVLips.zip"
extract_path = '/content/AVLips_data'

if not os.path.exists(extract_path):
    print(f"⏳ Extracting...")
    start_time = time.time()
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"✅ Extracted in {time.time()-start_time:.1f} seconds")
else:
    print(f"✅ Already extracted")

# ============================================================================
# STEP 3: CONFIGURATION
# ============================================================================

class Config:
    def __init__(self):
        self.data_dir = "/content/AVLips_data/AVLips"
        self.model_save_dir = "/content/models_multiseed/"
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        os.makedirs(self.model_save_dir, exist_ok=True)

        # Visual Stream
        self.vis_image_size = (128, 128)
        self.vis_num_frames = 16
        self.vis_cnn_feature_dim = 576
        self.vis_transformer_d_model = 256
        self.vis_transformer_nhead = 8
        self.vis_transformer_layers = 4
        self.vis_transformer_dropout = 0.1

        # Audio Stream
        self.aud_sample_rate = 16000
        self.aud_num_chunks = 5
        self.aud_chunk_duration = 1.0
        self.aud_n_mels = 128
        self.aud_cnn_feature_dim = 576
        self.aud_transformer_d_model = 256
        self.aud_transformer_nhead = 8
        self.aud_transformer_layers = 4
        self.aud_transformer_dropout = 0.1

        # Training
        self.batch_size = 64
        self.accumulation_steps = 4
        self.epochs = 20  # Reduced for faster multi-seed training
        self.learning_rate = 5e-5
        self.weight_decay = 0.05
        self.patience = 5
        self.gradient_clip = 1.0

        # Multi-seed config
        self.seeds = [42, 123, 456, 789, 2024]
        self.samples_per_class_per_seed = 2000  # 2000 real + 2000 fake per seed

config = Config()
print(f"✅ Config loaded: {config.device}")
print(f"🌱 Seeds: {config.seeds}")
print(f"📊 Samples per seed: {config.samples_per_class_per_seed * 2} (2000 real + 2000 fake)")

# ============================================================================
# STEP 4: DATA PROCESSING
# ============================================================================

def process_visual_stream(video_path: str, config: Config):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames < config.vis_num_frames:
        return None

    frame_indices = np.linspace(0, total_frames - 1, config.vis_num_frames, dtype=int)
    frames = []
    face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            continue

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_detector.detectMultiScale(gray, 1.1, 4)

        if len(faces) > 0:
            (x, y, w, h) = faces[0]
            mouth_crop = frame[y + int(h * 0.6):y + h, x + int(w * 0.25):x + int(w * 0.75)]
            if mouth_crop.size > 0:
                resized_crop = cv2.resize(mouth_crop, config.vis_image_size)
                resized_crop_rgb = cv2.cvtColor(resized_crop, cv2.COLOR_BGR2RGB)
                frames.append(resized_crop_rgb)

    cap.release()
    return np.stack(frames) if len(frames) == config.vis_num_frames else None

def process_audio_stream(video_path: str, config: Config):
    try:
        parts = Path(video_path).parts
        audio_filename = Path(video_path).stem + ".wav"
        label_folder = parts[-2]
        base_data_dir = str(Path(video_path).parent.parent)
        audio_path = os.path.join(base_data_dir, "wav", label_folder, audio_filename)

        y, sr = librosa.load(audio_path, sr=config.aud_sample_rate)
        total_samples = int(config.aud_chunk_duration * config.aud_num_chunks * sr)

        if len(y) < total_samples:
            y = np.pad(y, (0, total_samples - len(y)), mode='constant')
        else:
            y = y[:total_samples]

        samples_per_chunk = int(config.aud_chunk_duration * sr)
        mel_list = []

        for i in range(config.aud_num_chunks):
            chunk = y[i*samples_per_chunk : (i+1)*samples_per_chunk]
            mel = librosa.feature.melspectrogram(y=chunk, sr=sr, n_mels=config.aud_n_mels)
            mel_db = librosa.power_to_db(mel, ref=np.max)
            mel_db = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-9)
            mel_list.append(torch.tensor(mel_db, dtype=torch.float32))

        return torch.stack(mel_list, axis=0)
    except Exception:
        return None

# ============================================================================
# STEP 5: DATASET CLASSES
# ============================================================================

class DualStreamDataset(Dataset):
    def __init__(self, file_paths, labels, config):
        self.file_paths = file_paths
        self.labels = labels
        self.config = config

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        try:
            visual_frames_hwc = process_visual_stream(self.file_paths[idx], self.config)
            if visual_frames_hwc is None:
                return None

            visual_frames_tchw = visual_frames_hwc.transpose(0, 3, 1, 2)
            audio_mels = process_audio_stream(self.file_paths[idx], self.config)
            if audio_mels is None:
                return None

            return (visual_frames_tchw, audio_mels.unsqueeze(1)), torch.tensor(self.labels[idx], dtype=torch.float32)
        except Exception:
            return None

class RAMCachedDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        visual_frames_np, audio_tensor = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            augmented_frames = []
            for frame_np in visual_frames_np:
                frame_hwc = frame_np.transpose(1, 2, 0)
                augmented_frames.append(self.transform(frame_hwc))
            visual_tensor = torch.stack(augmented_frames)
        else:
            visual_tensor = torch.from_numpy(visual_frames_np).float()

        return (visual_tensor, audio_tensor), label

# ============================================================================
# STEP 6: TRANSFORMER MODEL
# ============================================================================

class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        batch_size, seq_len, _ = x.shape
        return x + self.pe[:seq_len, :].unsqueeze(0).expand(batch_size, -1, -1)

class VisualStream_Transformer(nn.Module):
    def __init__(self, config):
        super().__init__()
        mobilenet = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
        self.cnn_features = mobilenet.features
        self.avgpool = mobilenet.avgpool
        self.proj = nn.Linear(config.vis_cnn_feature_dim, config.vis_transformer_d_model)
        self.proj_dropout = nn.Dropout(0.3)
        self.pos_encoding = PositionalEncoding(config.vis_transformer_d_model, max_len=config.vis_num_frames)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config.vis_transformer_d_model, nhead=config.vis_transformer_nhead,
            dim_feedforward=config.vis_transformer_d_model * 4, dropout=config.vis_transformer_dropout,
            batch_first=True, norm_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, config.vis_transformer_layers)
        self.layer_norm = nn.LayerNorm(config.vis_transformer_d_model)
        self.out_dim = config.vis_transformer_d_model

    def forward(self, x):
        b, t, c, h, w = x.shape
        x = x.view(b * t, c, h, w)
        features = self.avgpool(self.cnn_features(x)).view(b, t, -1)
        projected = self.proj_dropout(self.proj(features))
        encoded = self.pos_encoding(projected)
        transformer_out = self.transformer(encoded)
        return self.layer_norm(transformer_out[:, -1, :])

class AudioStream_Transformer(nn.Module):
    def __init__(self, config):
        super().__init__()
        mobilenet = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
        self.cnn_features = mobilenet.features
        self.avgpool = mobilenet.avgpool
        self.proj = nn.Linear(config.aud_cnn_feature_dim, config.aud_transformer_d_model)
        self.proj_dropout = nn.Dropout(0.3)
        self.pos_encoding = PositionalEncoding(config.aud_transformer_d_model, max_len=config.aud_num_chunks)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config.aud_transformer_d_model, nhead=config.aud_transformer_nhead,
            dim_feedforward=config.aud_transformer_d_model * 4, dropout=config.aud_transformer_dropout,
            batch_first=True, norm_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, config.aud_transformer_layers)
        self.layer_norm = nn.LayerNorm(config.aud_transformer_d_model)
        self.out_dim = config.aud_transformer_d_model

    def forward(self, x):
        b, t, c, h, w = x.shape
        x = x.view(b * t, c, h, w).repeat(1, 3, 1, 1)
        features = self.avgpool(self.cnn_features(x)).view(b, t, -1)
        projected = self.proj_dropout(self.proj(features))
        encoded = self.pos_encoding(projected)
        transformer_out = self.transformer(encoded)
        return self.layer_norm(transformer_out[:, -1, :])

class FusionModel_Transformer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.visual_stream = VisualStream_Transformer(config)
        self.audio_stream = AudioStream_Transformer(config)
        fusion_input_dim = self.visual_stream.out_dim + self.audio_stream.out_dim
        self.fusion_head = nn.Sequential(
            nn.Linear(fusion_input_dim, 256), nn.ReLU(), nn.Dropout(0.6), nn.Linear(256, 1)
        )

    def forward(self, visual_input, audio_input):
        visual_features = self.visual_stream(visual_input)
        audio_features = self.audio_stream(audio_input)
        return self.fusion_head(torch.cat((visual_features, audio_features), dim=1))

# ============================================================================
# STEP 7: TRAINING FUNCTIONS
# ============================================================================

class LabelSmoothingBCELoss(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        target = target * (1 - self.smoothing) + 0.5 * self.smoothing
        return F.binary_cross_entropy_with_logits(pred, target)

def train_one_epoch(model, loader, optimizer, criterion, scaler, config):
    model.train()
    total_loss = 0
    for i, ((visual_data, audio_data), labels) in enumerate(tqdm(loader, desc="Training")):
        visual_data = visual_data.to(config.device, non_blocking=True)
        audio_data = audio_data.to(config.device, non_blocking=True)
        labels = labels.to(config.device, non_blocking=True).unsqueeze(1).float()

        with autocast():
            outputs = model(visual_data, audio_data)
            loss = criterion(outputs, labels) / config.accumulation_steps

        scaler.scale(loss).backward()

        if (i + 1) % config.accumulation_steps == 0:
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.gradient_clip)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)

        total_loss += loss.item() * config.accumulation_steps

    return total_loss / len(loader)

def validate_one_epoch(model, loader, criterion, config):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for (visual_data, audio_data), labels in tqdm(loader, desc="Validating"):
            visual_data = visual_data.to(config.device, non_blocking=True)
            audio_data = audio_data.to(config.device, non_blocking=True)
            labels = labels.to(config.device, non_blocking=True).unsqueeze(1).float()

            with autocast():
                loss = criterion(model(visual_data, audio_data), labels)

            total_loss += loss.item()

    return total_loss / len(loader)

# ============================================================================
# STEP 8: CACHE ENTIRE AVLIPS DATASET (ONCE!)
# ============================================================================

print("\n" + "="*80)
print("🗄️  CACHING ENTIRE AVLIPS DATASET INTO RAM (THIS TAKES ~30-40 MIN)")
print("="*80)

real_dir = os.path.join(config.data_dir, "0_real")
fake_dir = os.path.join(config.data_dir, "1_fake")

all_real_files = [os.path.join(real_dir, f) for f in os.listdir(real_dir) if f.endswith('.mp4')]
all_fake_files = [os.path.join(fake_dir, f) for f in os.listdir(fake_dir) if f.endswith('.mp4')]

print(f"Found: {len(all_real_files)} real, {len(all_fake_files)} fake videos")

def collate_fn_skip_errors(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch) if batch else (None, None)

def cache_all_data(files, labels, desc):
    dataset = DualStreamDataset(files, labels, config)
    loader = DataLoader(dataset, batch_size=config.batch_size, num_workers=os.cpu_count(), collate_fn=collate_fn_skip_errors)
    cached_data, cached_labels = [], []

    for data, batch_labels in tqdm(loader, desc=f"Caching {desc}"):
        if data is not None:
            visual_batch, audio_batch = data
            for i in range(visual_batch.shape[0]):
                cached_data.append((visual_batch[i].numpy(), audio_batch[i]))
                cached_labels.append(batch_labels[i].item())

    return cached_data, cached_labels

# Cache ALL real videos
all_real_cached, all_real_labels = cache_all_data(all_real_files, [0]*len(all_real_files), "ALL REAL")
print(f"✅ Cached {len(all_real_cached)} real videos")

# Cache ALL fake videos
all_fake_cached, all_fake_labels = cache_all_data(all_fake_files, [1]*len(all_fake_files), "ALL FAKE")
print(f"✅ Cached {len(all_fake_cached)} fake videos")

print(f"\n🎉 FULL DATASET CACHED! Real: {len(all_real_cached)}, Fake: {len(all_fake_cached)}")

# ============================================================================
# STEP 9: MULTI-SEED TRAINING LOOP
# ============================================================================

print("\n" + "="*80)
print(f"🌱 TRAINING {len(config.seeds)} MODELS WITH DIFFERENT SEEDS")
print("="*80)

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

all_results = []

for seed_idx, seed in enumerate(config.seeds):
    print(f"\n{'='*80}")
    print(f"🌱 SEED {seed_idx+1}/{len(config.seeds)}: {seed}")
    print(f"{'='*80}")

    # Set seed for reproducibility
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    # Sample 2000 real + 2000 fake from cached data
    real_indices = np.random.choice(len(all_real_cached), min(config.samples_per_class_per_seed, len(all_real_cached)), replace=False)
    fake_indices = np.random.choice(len(all_fake_cached), min(config.samples_per_class_per_seed, len(all_fake_cached)), replace=False)

    sampled_real_data = [all_real_cached[i] for i in real_indices]
    sampled_fake_data = [all_fake_cached[i] for i in fake_indices]
    sampled_real_labels = [0] * len(sampled_real_data)
    sampled_fake_labels = [1] * len(sampled_fake_data)

    # Combine
    all_sampled_data = sampled_real_data + sampled_fake_data
    all_sampled_labels = sampled_real_labels + sampled_fake_labels

    print(f"📊 Sampled: {len(sampled_real_data)} real + {len(sampled_fake_data)} fake = {len(all_sampled_data)} total")

    # Train/val/test split
    train_indices, temp_indices = train_test_split(range(len(all_sampled_data)), test_size=0.3, random_state=seed,
                                                    stratify=all_sampled_labels)
    val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=seed,
                                                  stratify=[all_sampled_labels[i] for i in temp_indices])

    train_data = [all_sampled_data[i] for i in train_indices]
    train_labels = torch.tensor([all_sampled_labels[i] for i in train_indices])
    val_data = [all_sampled_data[i] for i in val_indices]
    val_labels = torch.tensor([all_sampled_labels[i] for i in val_indices])
    test_data = [all_sampled_data[i] for i in test_indices]
    test_labels = torch.tensor([all_sampled_labels[i] for i in test_indices])

    print(f"Split: Train={len(train_data)}, Val={len(val_data)}, Test={len(test_data)}")

    # Create datasets
    train_dataset = RAMCachedDataset(train_data, train_labels, transform=train_transform)
    val_dataset = RAMCachedDataset(val_data, val_labels, transform=val_test_transform)
    test_dataset = RAMCachedDataset(test_data, test_labels, transform=val_test_transform)

    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2, pin_memory=True)

    # Build model
    model = FusionModel_Transformer(config).to(config.device)
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    criterion = LabelSmoothingBCELoss(smoothing=0.1)
    scaler = GradScaler()
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-6)

    model_path = os.path.join(config.model_save_dir, f'transformer_seed{seed}_best.pth')
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # Training loop
    print(f"🚀 Training for {config.epochs} epochs...")
    for epoch in range(config.epochs):
        print(f"\n--- Epoch {epoch+1}/{config.epochs} ---")

        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, scaler, config)
        val_loss = validate_one_epoch(model, val_loader, criterion, config)

        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_path)
            print(f"🏆 Best model saved!")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= config.patience:
                print(f"🛑 Early stopping")
                break

    # Evaluate
    model.load_state_dict(torch.load(model_path))
    model.eval()

    all_labels, all_preds = [], []
    with torch.no_grad():
        for (visual_data, audio_data), labels in tqdm(test_loader, desc="Testing"):
            visual_data = visual_data.to(config.device)
            audio_data = audio_data.to(config.device)
            outputs = model(visual_data, audio_data)
            all_preds.extend(torch.sigmoid(outputs).cpu().numpy())
            all_labels.extend(labels.numpy())

    all_preds = np.array(all_preds).flatten()
    all_labels = np.array(all_labels).flatten()
    preds_binary = (all_preds > 0.5).astype(int)

    accuracy = (preds_binary == all_labels).mean()
    auc_score = roc_auc_score(all_labels, all_preds)

    print(f"\n✅ SEED {seed} RESULTS:")
    print(f"   Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"   AUC:      {auc_score:.4f} ({auc_score*100:.2f}%)")

    all_results.append({'seed': seed, 'accuracy': accuracy, 'auc': auc_score})

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*80)
print("📊 MULTI-SEED TRAINING COMPLETE!")
print("="*80)

df = pd.DataFrame(all_results)
print("\n" + df.to_string(index=False))

print(f"\nAccuracy: {df['accuracy'].mean()*100:.2f}% ± {df['accuracy'].std()*100:.2f}%")
print(f"AUC:      {df['auc'].mean()*100:.2f}% ± {df['auc'].std()*100:.2f}%")

df.to_csv('/content/transformer_multiseed_results.csv', index=False)
print("\n✅ Saved: /content/transformer_multiseed_results.csv")
print("="*80)


Mounted at /content/drive
✅ Libraries imported successfully
🔧 PyTorch: 2.8.0+cu126, CUDA: True

📦 EXTRACTING AVLIPS DATASET
⏳ Extracting...
✅ Extracted in 235.1 seconds
✅ Config loaded: cuda
🌱 Seeds: [42, 123, 456, 789, 2024]
📊 Samples per seed: 4000 (2000 real + 2000 fake)

🗄️  CACHING ENTIRE AVLIPS DATASET INTO RAM (THIS TAKES ~30-40 MIN)
Found: 3396 real, 4206 fake videos


Caching ALL REAL: 100%|██████████| 54/54 [33:39<00:00, 37.39s/it]


✅ Cached 1519 real videos


Caching ALL FAKE: 100%|██████████| 66/66 [41:25<00:00, 37.66s/it]


✅ Cached 2210 fake videos

🎉 FULL DATASET CACHED! Real: 1519, Fake: 2210

🌱 TRAINING 5 MODELS WITH DIFFERENT SEEDS

🌱 SEED 1/5: 42
📊 Sampled: 1519 real + 2000 fake = 3519 total
Split: Train=2463, Val=528, Test=528
Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth


100%|██████████| 9.83M/9.83M [00:00<00:00, 137MB/s]


🚀 Training for 20 epochs...

--- Epoch 1/20 ---


Training: 100%|██████████| 39/39 [05:56<00:00,  9.14s/it]
Validating: 100%|██████████| 9/9 [00:06<00:00,  1.30it/s]


Train Loss: 0.7008, Val Loss: 0.6869
🏆 Best model saved!

--- Epoch 2/20 ---


Training: 100%|██████████| 39/39 [00:33<00:00,  1.15it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.24it/s]


Train Loss: 0.6676, Val Loss: 0.6753
🏆 Best model saved!

--- Epoch 3/20 ---


Training: 100%|██████████| 39/39 [00:34<00:00,  1.14it/s]
Validating: 100%|██████████| 9/9 [00:03<00:00,  2.25it/s]


Train Loss: 0.6160, Val Loss: 0.6832

--- Epoch 4/20 ---


Training: 100%|██████████| 39/39 [00:33<00:00,  1.15it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.24it/s]


Train Loss: 0.5162, Val Loss: 0.7700

--- Epoch 5/20 ---


Training: 100%|██████████| 39/39 [00:34<00:00,  1.13it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.17it/s]


Train Loss: 0.4400, Val Loss: 0.8669

--- Epoch 6/20 ---


Training: 100%|██████████| 39/39 [00:34<00:00,  1.13it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.22it/s]


Train Loss: 0.3831, Val Loss: 0.8489

--- Epoch 7/20 ---


Training: 100%|██████████| 39/39 [00:33<00:00,  1.15it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.24it/s]


Train Loss: 0.3456, Val Loss: 0.9316
🛑 Early stopping


Testing: 100%|██████████| 9/9 [00:52<00:00,  5.86s/it]



✅ SEED 42 RESULTS:
   Accuracy: 0.5758 (57.58%)
   AUC:      0.5827 (58.27%)

🌱 SEED 2/5: 123
📊 Sampled: 1519 real + 2000 fake = 3519 total
Split: Train=2463, Val=528, Test=528
🚀 Training for 20 epochs...

--- Epoch 1/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.11it/s]


Train Loss: 0.6970, Val Loss: 0.6842
🏆 Best model saved!

--- Epoch 2/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.11it/s]


Train Loss: 0.6752, Val Loss: 0.6737
🏆 Best model saved!

--- Epoch 3/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.08it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.07it/s]


Train Loss: 0.6309, Val Loss: 0.6339
🏆 Best model saved!

--- Epoch 4/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.07it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.12it/s]


Train Loss: 0.5396, Val Loss: 0.5930
🏆 Best model saved!

--- Epoch 5/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]


Train Loss: 0.4415, Val Loss: 0.6784

--- Epoch 6/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.3819, Val Loss: 0.9185

--- Epoch 7/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]


Train Loss: 0.3530, Val Loss: 0.8967

--- Epoch 8/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.3197, Val Loss: 0.6868

--- Epoch 9/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.08it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.2971, Val Loss: 0.7159
🛑 Early stopping


Testing: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]



✅ SEED 123 RESULTS:
   Accuracy: 0.7064 (70.64%)
   AUC:      0.8054 (80.54%)

🌱 SEED 3/5: 456
📊 Sampled: 1519 real + 2000 fake = 3519 total
Split: Train=2463, Val=528, Test=528
🚀 Training for 20 epochs...

--- Epoch 1/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.6931, Val Loss: 0.6879
🏆 Best model saved!

--- Epoch 2/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.11it/s]


Train Loss: 0.6653, Val Loss: 0.6661
🏆 Best model saved!

--- Epoch 3/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.6115, Val Loss: 0.6290
🏆 Best model saved!

--- Epoch 4/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.5031, Val Loss: 0.7341

--- Epoch 5/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.4222, Val Loss: 0.7773

--- Epoch 6/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.07it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.07it/s]


Train Loss: 0.3724, Val Loss: 0.8441

--- Epoch 7/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.3357, Val Loss: 0.8058

--- Epoch 8/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]


Train Loss: 0.3207, Val Loss: 0.6905
🛑 Early stopping


Testing: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]



✅ SEED 456 RESULTS:
   Accuracy: 0.6667 (66.67%)
   AUC:      0.7382 (73.82%)

🌱 SEED 4/5: 789
📊 Sampled: 1519 real + 2000 fake = 3519 total
Split: Train=2463, Val=528, Test=528
🚀 Training for 20 epochs...

--- Epoch 1/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.6956, Val Loss: 0.6863
🏆 Best model saved!

--- Epoch 2/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.6684, Val Loss: 0.6856
🏆 Best model saved!

--- Epoch 3/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.11it/s]


Train Loss: 0.6053, Val Loss: 0.6459
🏆 Best model saved!

--- Epoch 4/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.5091, Val Loss: 0.6459

--- Epoch 5/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.4297, Val Loss: 0.7994

--- Epoch 6/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.07it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.3617, Val Loss: 0.6655

--- Epoch 7/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.3376, Val Loss: 0.6124
🏆 Best model saved!

--- Epoch 8/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.3047, Val Loss: 0.6341

--- Epoch 9/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.08it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.2876, Val Loss: 0.5460
🏆 Best model saved!

--- Epoch 10/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]


Train Loss: 0.2699, Val Loss: 0.5110
🏆 Best model saved!

--- Epoch 11/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.2574, Val Loss: 0.4539
🏆 Best model saved!

--- Epoch 12/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.2539, Val Loss: 0.4478
🏆 Best model saved!

--- Epoch 13/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.2450, Val Loss: 0.4184
🏆 Best model saved!

--- Epoch 14/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.08it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]


Train Loss: 0.2381, Val Loss: 0.4213

--- Epoch 15/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.07it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.2358, Val Loss: 0.4311

--- Epoch 16/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.2303, Val Loss: 0.4601

--- Epoch 17/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.2254, Val Loss: 0.3616
🏆 Best model saved!

--- Epoch 18/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.2243, Val Loss: 0.3177
🏆 Best model saved!

--- Epoch 19/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.2182, Val Loss: 0.3250

--- Epoch 20/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.2186, Val Loss: 0.4054


Testing: 100%|██████████| 9/9 [00:04<00:00,  2.06it/s]



✅ SEED 789 RESULTS:
   Accuracy: 0.9318 (93.18%)
   AUC:      0.9820 (98.20%)

🌱 SEED 5/5: 2024
📊 Sampled: 1519 real + 2000 fake = 3519 total
Split: Train=2463, Val=528, Test=528
🚀 Training for 20 epochs...

--- Epoch 1/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.6871, Val Loss: 0.6897
🏆 Best model saved!

--- Epoch 2/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.10it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.05it/s]


Train Loss: 0.6587, Val Loss: 0.6701
🏆 Best model saved!

--- Epoch 3/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.5853, Val Loss: 0.6079
🏆 Best model saved!

--- Epoch 4/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.4847, Val Loss: 0.7311

--- Epoch 5/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


Train Loss: 0.4227, Val Loss: 0.8947

--- Epoch 6/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.3684, Val Loss: 0.8178

--- Epoch 7/20 ---


Training: 100%|██████████| 39/39 [00:36<00:00,  1.08it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.09it/s]


Train Loss: 0.3467, Val Loss: 0.7528

--- Epoch 8/20 ---


Training: 100%|██████████| 39/39 [00:35<00:00,  1.09it/s]
Validating: 100%|██████████| 9/9 [00:04<00:00,  2.08it/s]


Train Loss: 0.3196, Val Loss: 0.8325
🛑 Early stopping


Testing: 100%|██████████| 9/9 [00:04<00:00,  2.07it/s]


✅ SEED 2024 RESULTS:
   Accuracy: 0.6496 (64.96%)
   AUC:      0.7183 (71.83%)

📊 MULTI-SEED TRAINING COMPLETE!

 seed  accuracy      auc
   42  0.575758 0.582675
  123  0.706439 0.805380
  456  0.666667 0.738202
  789  0.931818 0.982018
 2024  0.649621 0.718333

Accuracy: 70.61% ± 13.48%
AUC:      76.53% ± 14.57%

✅ Saved: /content/transformer_multiseed_results.csv



