In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image

In [None]:


# Configuration
DATA_DIR = "./"
TRAIN_DIR = os.path.join(DATA_DIR, "train/train/")
TEST_DIR = os.path.join(DATA_DIR, "test/test/")
CSV_PATH = os.path.join(DATA_DIR, "train.csv")

BATCH_SIZE = 16  # Smaller batch size for stability
EPOCHS = 15
LR = 1e-4
N_SPLITS = 5
IMG_SIZE = 320  # Reasonable size
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED = 42

In [None]:


# Set seeds
torch.manual_seed(SEED)
np.random.seed(SEED)

print(f"Using device: {DEVICE}")

# Load data
df = pd.read_csv(CSV_PATH)
le = LabelEncoder()
df['label_idx'] = le.fit_transform(df['TARGET'])
df['image_id'] = df['ID']
num_classes = df['label_idx'].nunique()

print(f"Number of classes: {num_classes}")
print("Class distribution:")
print(df['TARGET'].value_counts())

In [None]:

# Simple but effective dataset
class ImprovedDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['image_id'])
        
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            # Return a blank image if loading fails
            image = Image.new('RGB', (224, 224), color=(128, 128, 128))
            
        if self.transform:
            image = self.transform(image)
            
        label = row['label_idx']
        return image, label


In [None]:


# Enhanced but simple transforms
def get_train_transforms(img_size=320):
    return transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.2),
    ])

def get_val_transforms(img_size=320):
    return transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


In [None]:

# Model selection - simplified to avoid dependency issues
def get_model(model_name='resnext', num_classes=20):
    if model_name == 'resnext':
        model = models.resnext50_32x4d(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        print("✅ Using ResNeXt50")
    elif model_name == 'wide_resnet':
        model = models.wide_resnet50_2(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        print("✅ Using Wide ResNet50")
    else:
        # Default: ResNet50
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        print("✅ Using ResNet50")
    
    return model


In [None]:

# Class-weighted loss
class WeightedFocalLoss(nn.Module):
    def __init__(self, class_weights=None, alpha=1, gamma=2):
        super().__init__()
        self.class_weights = class_weights
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(weight=self.class_weights, reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return torch.mean(focal_loss)


In [None]:

# Calculate class weights
def get_class_weights(df):
    class_counts = df['label_idx'].value_counts().sort_index().values
    total_samples = len(df)
    class_weights = total_samples / (len(class_counts) * class_counts)
    return torch.FloatTensor(class_weights)


In [None]:

# Training function
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc="Training")
    for batch_idx, (imgs, labels) in enumerate(pbar):
        imgs, labels = imgs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        total_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += imgs.size(0)
        
        if batch_idx % 50 == 0:  # Update progress every 50 batches
            pbar.set_postfix({
                'Loss': f'{loss.item():.4f}', 
                'Acc': f'{correct/total:.4f}',
                'Batch': f'{batch_idx}/{len(loader)}'
            })
    
    return total_loss / total, correct / total

In [None]:

# Validation function
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    preds_all, labels_all = [], []
    
    with torch.no_grad():
        for imgs, labels in tqdm(loader, desc="Validating", leave=False):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += imgs.size(0)
            
            preds_all.extend(preds.cpu().numpy())
            labels_all.extend(labels.cpu().numpy())
    
    f1 = f1_score(labels_all, preds_all, average="micro")
    return total_loss / total, correct / total, f1


In [None]:


# Main training loop
def train_models():
    # Use reliable torchvision models only
    model_names = ['resnext', 'wide_resnet']  # Both better than ResNet50
    
    for model_name in model_names:
        print(f"\n{'='*60}")
        print(f"Training {model_name}")
        print(f"{'='*60}")
        
        skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
        fold_scores = []
        
        for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['label_idx'])):
            print(f"\n🔥 Fold {fold+1}/{N_SPLITS}")
            
            train_df = df.iloc[train_idx].reset_index(drop=True)
            val_df = df.iloc[val_idx].reset_index(drop=True)
            
            print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}")
            
            # Create datasets
            train_dataset = ImprovedDataset(train_df, TRAIN_DIR, get_train_transforms(IMG_SIZE))
            val_dataset = ImprovedDataset(val_df, TRAIN_DIR, get_val_transforms(IMG_SIZE))
            
            # Create data loaders - simple approach
            train_loader = DataLoader(
                train_dataset, 
                batch_size=BATCH_SIZE, 
                shuffle=True,
                num_workers=0,  # Set to 0 to avoid multiprocessing issues
                pin_memory=True if DEVICE == 'cuda' else False
            )
            val_loader = DataLoader(
                val_dataset, 
                batch_size=BATCH_SIZE, 
                shuffle=False,
                num_workers=0,
                pin_memory=True if DEVICE == 'cuda' else False
            )
            
            # Model setup
            model = get_model(model_name, num_classes).to(DEVICE)
            
            # Optimizer and scheduler
            optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)
            
            # Loss with class weights
            class_weights = get_class_weights(train_df).to(DEVICE)
            criterion = WeightedFocalLoss(class_weights=class_weights, alpha=1, gamma=2)
            
            # Training loop with early stopping
            best_f1 = 0
            patience = 5
            patience_counter = 0
            
            print("Starting training...")
            for epoch in range(EPOCHS):
                print(f"\nEpoch {epoch+1}/{EPOCHS}")
                
                # Train
                train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
                
                # Validate
                val_loss, val_acc, val_f1 = validate(model, val_loader, criterion, DEVICE)
                
                # Step scheduler
                scheduler.step()
                
                print(f"Train - Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
                print(f"Val   - Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1: {val_f1:.4f}")
                
                # Save best model
                if val_f1 > best_f1:
                    best_f1 = val_f1
                    patience_counter = 0
                    torch.save(model.state_dict(), f"{model_name}_fold{fold}_best.pth")
                    print(f"✅ New best F1: {best_f1:.4f}")
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"⏰ Early stopping at epoch {epoch+1}")
                        break
            
            fold_scores.append(best_f1)
            print(f"\n📊 Fold {fold+1} completed - Best F1: {best_f1:.4f}")
        
        avg_score = np.mean(fold_scores)
        std_score = np.std(fold_scores)
        print(f"\n🎯 {model_name} Results:")
        print(f"Fold scores: {[f'{score:.4f}' for score in fold_scores]}")
        print(f"Average F1: {avg_score:.4f} ± {std_score:.4f}")

if __name__ == "__main__":
    print("🚀 Starting improved training...")
    train_models()
    print("\n🎉 Training completed!")
    print("📁 Model files saved with format: {model_name}_fold{fold}_best.pth")