In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

from sklearn.model_selection import train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2

try:
    import albumentations, tqdm
except ImportError:
    !pip install --quiet albumentations tqdm
    import albumentations, tqdm


  check_for_updates()


In [2]:
from sklearn.model_selection import StratifiedKFold

# 1. Load the CSV of image IDs and labels
labels = pd.read_csv(
    '/kaggle/input/soil-classification-dataset-2025/train_labels.csv'
)

# 2. Add the full file path for each image
labels['file_path'] = (
    '/kaggle/input/soil-classification-dataset-2025/train/'
    + labels['image_id']
)

# 3. Prepare Stratified K-Fold splits (5 folds)
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

folds = []
for train_idx, val_idx in skf.split(labels, labels['soil_type']):
    train_df_fold = labels.iloc[train_idx].reset_index(drop=True)
    val_df_fold   = labels.iloc[val_idx].reset_index(drop=True)
    folds.append((train_df_fold, val_df_fold))

print(f"Found {len(labels)} images across {labels['soil_type'].nunique()} classes.")
print(f"Prepared {n_splits} stratified folds.")


Found 1222 images across 4 classes.
Prepared 5 stratified folds.


In [3]:
# Cell 3: Model, Optimizer, Scheduler, Loss

# 0. Ensure RangerAdaBelief is installed
!pip install --quiet ranger-adabelief

import torch
from torchvision import models
from ranger_adabelief import RangerAdaBelief
from torch.optim.lr_scheduler import CyclicLR

# 1. Device and model (using the new weights API)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weights = models.ResNet50_Weights.DEFAULT
model = models.resnet50(weights=weights).to(device)

# 2. Optimizer with weight decay (regularization)
optimizer = RangerAdaBelief(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-3,
    weight_decay=1e-4
)

# 3. Scheduler (Cyclical Learning Rate)
scheduler = CyclicLR(
    optimizer,
    base_lr=1e-5,
    max_lr=1e-3,
    step_size_up=2000,
    mode='triangular2',
    cycle_momentum=False
)

# 4. Loss with label smoothing
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)


Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [10]:
# Cell 4: Transforms, Dataset, and DataLoaders

# 1. Define Albumentations transforms
train_transforms = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.GaussianBlur(p=0.3),
    A.CoarseDropout(
        num_holes_range=(1, 8),
        hole_height_range=(1, 16),
        hole_width_range=(1, 16),
        p=0.5
    ),
    A.Normalize(),
    ToTensorV2()
])

val_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(),
    ToTensorV2()
])

# 2. Custom Dataset class
class SoilDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms
        self.label_map = {
            'Alluvial soil': 0,
            'Black Soil': 1,
            'Clay soil': 2,
            'Red soil': 3
        }

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['file_path']).convert('RGB')
        image = np.array(image)
        if self.transforms:
            image = self.transforms(image=image)['image']
        label = self.label_map[row['soil_type']]
        return image, label

# 3. Select the first fold for a quick setup
train_df, val_df = folds[0]

# 4. Create datasets and dataloaders
train_dataset = SoilDataset(train_df, transforms=train_transforms)
val_dataset   = SoilDataset(val_df,   transforms=val_transforms)

# Using num_workers=0 to avoid multiprocessing issues
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4,pin_memory=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4,pin_memory=True)

# 5. Quick check
print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")


Train batches: 31
Validation batches: 8


In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0
    all_preds = []
    all_labels = []
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
    epoch_loss = running_loss / len(dataloader.dataset)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"Train Loss: {epoch_loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

def validate_one_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    epoch_loss = running_loss / len(dataloader.dataset)
    f1_per_class = f1_score(all_labels, all_preds, average=None)
    for idx, score in enumerate(f1_per_class):
        print(f"F1 Score for class {idx}: {score:.4f}")


In [15]:
from ranger_adabelief import RangerAdaBelief
from torch.optim.lr_scheduler import CyclicLR
import gc

num_epochs = 55
best_fold_models = []

for fold, (train_df_fold, val_df_fold) in enumerate(folds, 1):
    print(f"\n===== Fold {fold}/5 =====")
    
    # Clear memory before starting a new fold
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    
    # Prepare loaders (using your successful configuration)
    train_loader = DataLoader(
        SoilDataset(train_df_fold, train_transforms),
        batch_size=32, shuffle=True, num_workers=4, pin_memory=True
    )
    val_loader = DataLoader(
        SoilDataset(val_df_fold, val_transforms),
        batch_size=32, shuffle=False, num_workers=4, pin_memory=True
    )
    
    # Model - use weights parameter but equivalent to pretrained=True
    model_fold = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    
    # Freeze early layers (exactly as in your successful code)
    for name, param in model_fold.named_parameters():
        param.requires_grad = not (name.startswith('layer1') or name.startswith('layer2'))
    
    # Custom head with higher dropout for better generalization
    in_features = model_fold.fc.in_features
    model_fold.fc = torch.nn.Sequential(
        torch.nn.Dropout(p=0.5),
        torch.nn.Linear(in_features, 512),
        torch.nn.ReLU(),
        torch.nn.Dropout(p=0.3),
        torch.nn.Linear(512, 4)
    )
    model_fold = model_fold.to(device)
    
    # Optimizer with slightly lower learning rate to prevent overfitting
    optimizer_fold = RangerAdaBelief(
        filter(lambda p: p.requires_grad, model_fold.parameters()),
        lr=8e-4,  # Slightly lower than original
        weight_decay=1e-4,
        eps=1e-8,  # For numerical stability
        betas=(0.9, 0.999)
    )
    
    # Scheduler (same as your successful version)
    scheduler = CyclicLR(
        optimizer_fold,
        base_lr=1e-5,
        max_lr=1e-3,
        step_size_up=2000,
        mode='triangular2',
        cycle_momentum=False
    )
    
    best_min_f1 = 0.0
    best_state = None
    
    for epoch in range(1, num_epochs + 1):
        print(f"Epoch {epoch}/{num_epochs}")
        
        # Training
        model_fold.train()
        run_loss = 0
        train_preds, train_labels = [], []
        
        for imgs, lbls in train_loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            optimizer_fold.zero_grad()
            outputs = model_fold(imgs)
            loss = criterion(outputs, lbls)
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model_fold.parameters(), max_norm=1.0)
            
            optimizer_fold.step()
            scheduler.step()  # Keep stepping on every batch as in original code
            
            run_loss += loss.item() * imgs.size(0)
            train_preds.extend(outputs.argmax(1).cpu().numpy())
            train_labels.extend(lbls.cpu().numpy())
        
        # Calculate training metrics once per epoch
        train_f1 = f1_score(train_labels, train_preds, average='weighted')
        print(f"  Train F1: {train_f1:.4f}")
        
        # Validation
        model_fold.eval()
        val_preds, val_labels, val_loss = [], [], 0
        
        with torch.no_grad():
            for imgs, lbls in val_loader:
                imgs, lbls = imgs.to(device), lbls.to(device)
                outputs = model_fold(imgs)
                val_loss += criterion(outputs, lbls).item() * imgs.size(0)
                val_preds.extend(outputs.argmax(1).cpu().numpy())
                val_labels.extend(lbls.cpu().numpy())
        
        # Calculate F1 scores
        f1_per_class = f1_score(val_labels, val_preds, average=None)
        min_f1 = f1_per_class.min()
        avg_f1 = f1_score(val_labels, val_preds, average='weighted')
        
        print(f"  Fold {fold} Val Min F1: {min_f1:.4f}, Avg F1: {avg_f1:.4f}")
        
        # Save best model based on minimum class F1 score
        if min_f1 > best_min_f1:
            best_min_f1 = min_f1
            best_state = {k: v.cpu().detach().clone() for k, v in model_fold.state_dict().items()}
            print(f"  New best for fold {fold}: {best_min_f1:.4f}")
    
    # Save best model
    ckpt_path = f"/kaggle/working/resnet50_fold{fold}_best.pth"
    torch.save(best_state, ckpt_path)
    best_fold_models.append(ckpt_path)
    print(f"Saved fold {fold} model to {ckpt_path}")
    
    # Clean up to prevent memory issues
    del model_fold, optimizer_fold, scheduler, best_state
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()



===== Fold 1/5 =====
Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers
Epoch 1/55
  Train F1: 0.2334
  Fold 1 Val Min F1: 0.0000, Avg F1: 0.1978
Epoch 2/55
  Train F1: 0.2575
  Fold 1 Val Min F1: 0.0351, Avg F1: 0.2326
  New best for fold 1: 0.0351
Epoch 3/55
  Train F1: 0.2915
  Fold 1 Val Min F1: 0.0000, Avg F1: 0.2576
Epoch 4/55
  Train F1: 0.2725
  Fold 1 Val Min F1: 0.0000, Avg F1: 0.3119
Epoch 5/55
  Train F1: 0.2817
  Fold 1 Val Min F1: 0.0000, Avg F1: 0.4474
Epoch 6/55
  Train F1: 0.3489
  Fold 1 Val Min F1: 0.0000, Avg F1: 0.5249
Epoch 7/55
  Train F1: 0.4577
  Fold 1 Val Min F1: 0.0930, Avg F1: 0.5971
  New best for fold 1: 0.0930
Epoch 8/55
  Train F1: 0.5069
  Fold 1 Val Min F1: 0.2174, Avg F1: 0.6537
  New best for fold 1: 0.2174
Epoch 9/55
  Train F1: 0.5371
  Fold 1 Val Min F1: 0.2553, Avg F1: 0.6831
  New best for fold 1: 0.2553
Epoch 10/55
  Train F1: 0.5795
  Fold 1 Val Min F1: 0.4151, Avg F1: 0.7263
  New best for f

In [19]:
# Cell 7: Professional Ensemble Inference with F1 Score Evaluation
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.metrics import f1_score, classification_report

# Define TEST_DIR
TEST_DIR = '/kaggle/input/soil-classification-dataset-2025/test'
classes = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']

# Test-time augmentations (TTA)
tta_transforms = [
    A.Compose([A.Resize(224, 224), A.Normalize(), ToTensorV2()]),
    A.Compose([A.Resize(224, 224), A.HorizontalFlip(p=1.0), A.Normalize(), ToTensorV2()]),
    A.Compose([A.Resize(256, 256), A.CenterCrop(224, 224), A.Normalize(), ToTensorV2()]),
    A.Compose([A.Resize(224, 224), A.RandomBrightnessContrast(p=1.0), A.Normalize(), ToTensorV2()])
]

# Load models with correct architecture
ensemble = []
for path in best_fold_models:
    m = models.resnet50(weights=None)
    # Match the exact architecture used in training
    in_features = m.fc.in_features
    m.fc = torch.nn.Sequential(
        torch.nn.Dropout(p=0.5),
        torch.nn.Linear(in_features, 512),
        torch.nn.ReLU(),
        torch.nn.Dropout(p=0.3),
        torch.nn.Linear(512, 4)
    )
    m.load_state_dict(torch.load(path))
    m = m.to(device).eval()
    ensemble.append(m)

print(f"Loaded {len(ensemble)} models for ensemble prediction")

# Batch processing for efficiency
batch_size = 8
test_files = sorted(os.listdir(TEST_DIR))
all_preds = []

# Process in batches with progress bar
for i in tqdm(range(0, len(test_files), batch_size), desc="Predicting"):
    batch_files = test_files[i:i+batch_size]
    batch_preds = []
    
    for fname in batch_files:
        img_path = os.path.join(TEST_DIR, fname)
        img = np.array(Image.open(img_path).convert('RGB'))
        
        # Apply Test-Time Augmentation (TTA)
        tta_outputs = []
        for transform in tta_transforms:
            img_t = transform(image=img)['image'].unsqueeze(0).to(device)
            
            # Ensemble predictions
            model_outputs = []
            for model in ensemble:
                with torch.no_grad():
                    logits = model(img_t)
                    scaled_logits = logits / 1.5  # Temperature scaling
                    model_outputs.append(F.softmax(scaled_logits, dim=1))
            
            # Average the predictions from all models for this augmentation
            avg_output = torch.stack(model_outputs).mean(0)
            tta_outputs.append(avg_output)
        
        # Average predictions across all augmentations
        final_output = torch.stack(tta_outputs).mean(0)
        pred_class = classes[final_output.argmax(1).item()]
        
        batch_preds.append((fname, pred_class))
    
    all_preds.extend(batch_preds)

# Create submission
submission = pd.DataFrame(all_preds, columns=['image_id', 'soil_type'])

# Save submission
submission.to_csv('submission.csv', index=False)
print(f"Saved submission.csv with {len(submission)} predictions")

# Calculate F1 scores on validation data
print("\nEvaluating ensemble F1 scores on validation data...")

# Create a combined validation dataset from all folds
val_images = []
val_labels = []

for _, val_df_fold in folds:
    val_dataset = SoilDataset(val_df_fold, val_transforms)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)
    
    # Collect validation data
    for images, labels in val_loader:
        val_images.append(images)
        val_labels.append(labels)

# Concatenate all validation data
val_images = torch.cat(val_images)
val_labels = torch.cat(val_labels).numpy()

# Get ensemble predictions
ensemble_preds = []
with torch.no_grad():
    for i in range(0, len(val_images), 32):
        batch = val_images[i:i+32].to(device)
        
        # Aggregate predictions from all models
        batch_preds = []
        for model in ensemble:
            outputs = model(batch)
            batch_preds.append(torch.softmax(outputs, dim=1))
        
        # Average predictions from all models
        avg_pred = torch.stack(batch_preds).mean(0)
        predicted_classes = avg_pred.argmax(dim=1).cpu().numpy()
        ensemble_preds.extend(predicted_classes)

# Calculate and print F1 scores for each class
f1_per_class = f1_score(val_labels, ensemble_preds, average=None)

print("\nEnsemble F1 Scores by Class:")
for i, class_name in enumerate(classes):
    print(f"{class_name}: {f1_per_class[i]:.4f}")

# Also print overall weighted F1 score
weighted_f1 = f1_score(val_labels, ensemble_preds, average='weighted')
print(f"\nWeighted F1 Score: {weighted_f1:.4f}")

# Print detailed classification report
print("\nDetailed Classification Report:")
print(classification_report(val_labels, ensemble_preds, target_names=classes))


Loaded 5 models for ensemble prediction


Predicting: 100%|██████████| 43/43 [00:43<00:00,  1.02s/it]


Saved submission.csv with 341 predictions

Evaluating ensemble F1 scores on validation data...

Ensemble F1 Scores by Class:
Alluvial soil: 0.9991
Black Soil: 0.9978
Clay soil: 1.0000
Red soil: 1.0000

Weighted F1 Score: 0.9992

Detailed Classification Report:
               precision    recall  f1-score   support

Alluvial soil       1.00      1.00      1.00       528
   Black Soil       1.00      1.00      1.00       231
    Clay soil       1.00      1.00      1.00       199
     Red soil       1.00      1.00      1.00       264

     accuracy                           1.00      1222
    macro avg       1.00      1.00      1.00      1222
 weighted avg       1.00      1.00      1.00      1222

