In [1]:
# Core libraries
import os
import pandas as pd
import numpy as np
import shutil

# PyTorch and vision tools
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet34, ResNet34_Weights
from PIL import Image

# Sklearn tools for evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


## Loading and Preparing the Training Data

- Loaded the CSV file containing the training labels.
- Created a label mapping to convert soil type names into numeric labels for modeling.
- Defined the directory path containing training images.
- Since image filenames may have different extensions (`.jpg`, `.jpeg`, `.png`), implemented a function to correctly resolve the actual filename for each image.
- Applied this function to map each image ID to its resolved filename.
- Removed any rows where the image file could not be found to ensure data consistency.


In [2]:
# Load the training labels CSV
train_df = pd.read_csv('/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv')

# Map soil type strings to numeric labels
label_map = {
    'Alluvial soil': 0,
    'Black Soil': 1,
    'Clay soil': 2,
    'Red soil': 3
}
train_df['label'] = train_df['soil_type'].map(label_map)

# Define the training image directory
train_dir = '/kaggle/input/soil-classification/soil_classification-2025/train'

# Resolve actual image filenames (handles .jpg/.jpeg/.png extensions)
train_files = os.listdir(train_dir)
train_files_lower = {f.lower(): f for f in train_files}

def resolve_image_file(image_id):
    base = os.path.splitext(image_id)[0]
    for ext in ['.jpg', '.jpeg', '.png']:
        fname = f"{base}{ext}"
        if fname.lower() in train_files_lower:
            return train_files_lower[fname.lower()]
    return None

# Apply filename resolution
train_df['resolved_image'] = train_df['image_id'].apply(resolve_image_file)
train_df = train_df.dropna(subset=['resolved_image']).reset_index(drop=True)
train_df['image_id'] = train_df['resolved_image']
train_df.drop(columns=['resolved_image'], inplace=True)


## Data Transformation and Custom Dataset

- Set the target image size to 224x224 pixels.
- Defined normalization parameters matching pretrained model expectations.
- Created data augmentation pipeline for training, including resizing, random horizontal flips, rotations, and color jitter to improve model robustness.
- Defined a simpler transformation for validation and testing without augmentation.
- Implemented a custom `SoilDataset` class to:
  - Load images from disk.
  - Apply transformations.
  - Return image-label pairs for model training.


In [3]:
# Image size and normalization
IMG_SIZE = 224
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# Augmentation for training
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    normalize
])

# Minimal transform for validation/test
transform_val = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    normalize
])

# Custom dataset class for loading soil images
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(self.img_dir, row.image_id)
        image = Image.open(image_path).convert("RGB")
        label = row.label
        if self.transform:
            image = self.transform(image)
        return image, label


## Train-Validation Split and Model Setup

- Split the data into training and validation sets with stratification to maintain label distribution.
- Created custom dataset objects and corresponding dataloaders with batch size 32.
- Loaded a pretrained ResNet34 model and replaced its final fully connected layer to output predictions for 4 soil classes.
- Set up the device to use GPU if available for faster training.
- Defined the loss function as Cross-Entropy Loss suitable for multi-class classification.
- Used the Adam optimizer with a learning rate of 0.0001.
- Added a learning rate scheduler to reduce the learning rate by half every 5 epochs to improve convergence.


In [4]:
# Split data into train and validation
train_df, val_df = train_test_split(train_df, stratify=train_df['label'], test_size=0.2, random_state=42)

# Create dataset and dataloaders
train_ds = SoilDataset(train_df, train_dir, transform=transform_train)
val_ds = SoilDataset(val_df, train_dir, transform=transform_val)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)

# Load ResNet34 with pretrained weights
weights = ResNet34_Weights.DEFAULT
model = resnet34(weights=weights)

# Replace final classification layer to match 4 soil classes
model.fc = nn.Linear(model.fc.in_features, 4)

# Move model to GPU/CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 210MB/s]


## Model Training and Evaluation

- Defined a `train_model` function that trains the model for a specified number of epochs.
- In each epoch:
  - Set the model to training mode and computed the training loss.
  - Performed backpropagation and optimizer steps.
  - Updated the learning rate scheduler.
- After each epoch, switched to evaluation mode:
  - Predicted on the validation set.
  - Calculated per-class F1 scores to assess performance on each soil type.
  - Printed training loss and validation F1 scores for monitoring.
- Trained the model for 15 epochs.
- Saved the final trained model weights to `best_model.pth` for later use.


In [5]:
def train_model(model, epochs):
    for epoch in range(epochs):
        model.train()
        running_loss = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        scheduler.step()

        # Validation phase
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        f1 = f1_score(all_labels, all_preds, average=None)
        print(f"\nEpoch {epoch+1}/{epochs}")
        print(f"  ➤ Train Loss: {running_loss / len(train_loader):.4f}")
        print(f"  ➤ Per-class F1: {f1}")
        print(f"  ➤ Min F1: {min(f1):.4f}")

# Train the model
train_model(model, epochs=15)

# Save the best trained model
torch.save(model.state_dict(), "best_model.pth")



Epoch 1/15
  ➤ Train Loss: 0.4024
  ➤ Per-class F1: [0.95327103 0.91489362 0.94736842 0.98039216]
  ➤ Min F1: 0.9149

Epoch 2/15
  ➤ Train Loss: 0.1459
  ➤ Per-class F1: [0.96650718 0.95652174 0.93975904 0.98039216]
  ➤ Min F1: 0.9398

Epoch 3/15
  ➤ Train Loss: 0.1204
  ➤ Per-class F1: [0.94339623 0.9375     0.91358025 0.92783505]
  ➤ Min F1: 0.9136

Epoch 4/15
  ➤ Train Loss: 0.0828
  ➤ Per-class F1: [0.94       0.96774194 0.87640449 0.98076923]
  ➤ Min F1: 0.8764

Epoch 5/15
  ➤ Train Loss: 0.0607
  ➤ Per-class F1: [0.96039604 0.94845361 0.94117647 0.98039216]
  ➤ Min F1: 0.9412

Epoch 6/15
  ➤ Train Loss: 0.0469
  ➤ Per-class F1: [0.98076923 0.9787234  0.96296296 0.99029126]
  ➤ Min F1: 0.9630

Epoch 7/15
  ➤ Train Loss: 0.0318
  ➤ Per-class F1: [0.98550725 0.98924731 0.97560976 1.        ]
  ➤ Min F1: 0.9756

Epoch 8/15
  ➤ Train Loss: 0.0282
  ➤ Per-class F1: [0.98550725 0.98924731 0.97560976 1.        ]
  ➤ Min F1: 0.9756

Epoch 9/15
  ➤ Train Loss: 0.0353
  ➤ Per-class F1: [0.

## Preparing Test Data

- Copied test images from the input directory to the working directory to allow image format conversions.
- Loaded the CSV file containing test image IDs.
- Converted unsupported image formats (`.webp`, `.gif`) to `.jpg` for compatibility with the model.
- Implemented a function to resolve the correct image filename considering possible extensions (`.jpg`, `.jpeg`, `.png`).
- Applied this function to ensure all test image IDs map to valid image files, dropping any missing files to maintain data integrity.


In [6]:
# Copy test files to working directory to allow .jpg conversion
test_dir = '/kaggle/input/soil-classification/soil_classification-2025/test'
test_ids_path = '/kaggle/input/soil-classification/soil_classification-2025/test_ids.csv'
working_test_dir = '/kaggle/working/test'

if not os.path.exists(working_test_dir):
    shutil.copytree(test_dir, working_test_dir)

# Load test image IDs
test_df = pd.read_csv(test_ids_path)

# Handle .webp/.gif files → .jpg
test_files = os.listdir(working_test_dir)
test_files_lower = {f.lower(): f for f in test_files}

for fname in test_files:
    ext = os.path.splitext(fname)[1].lower()
    if ext in ['.webp', '.gif']:
        base = os.path.splitext(fname)[0]
        new_path = os.path.join(working_test_dir, base + ".jpg")
        if not os.path.exists(new_path):
            try:
                img = Image.open(os.path.join(working_test_dir, fname)).convert("RGB")
                img.save(new_path, format="JPEG")
                print(f"Converted {fname} → {base}.jpg")
            except Exception as e:
                print(f"Failed to convert {fname}: {e}")

# Resolve file paths in test folder
def resolve_file(image_id):
    base = os.path.splitext(image_id)[0]
    for ext in ['.jpg', '.jpeg', '.png']:
        fname = f"{base}{ext}"
        if fname.lower() in test_files_lower:
            return test_files_lower[fname.lower()]
        full_path = os.path.join(working_test_dir, fname)
        if os.path.exists(full_path):
            return fname
    return None

# Apply resolution
test_df['resolved_image'] = test_df['image_id'].apply(resolve_file)
test_df = test_df.dropna(subset=['resolved_image']).reset_index(drop=True)


Converted img_91cbc6e5.gif → img_91cbc6e5.jpg
Converted img_f22972ea.webp → img_f22972ea.jpg


## Test Dataset and DataLoader for Inference

- Defined the test transformation pipeline, which is the same as the validation transforms (resize + normalization).
- Created a custom `TestDataset` class to:
  - Load and transform test images.
  - Return the transformed image along with its original image ID.
- Initialized the test dataset and corresponding dataloader for batch inference with a batch size of 32.


In [7]:
# Final test transform (same as val)
test_transform = transform_val

# Create dataset for test inference
class TestDataset(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        path = os.path.join(self.img_dir, row['resolved_image'])
        image = Image.open(path).convert("RGB")
        image = self.transform(image)
        return image, row['image_id']

test_dataset = TestDataset(test_df, working_test_dir, test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


## Model Inference on Test Data

- Reloaded the pretrained ResNet34 model architecture and replaced the final layer to match the 4 soil classes.
- Loaded the saved trained weights (`best_model.pth`) into the model.
- Set the model to evaluation mode and moved it to the appropriate device (GPU/CPU).
- Created a reverse mapping from numeric labels back to soil type names.
- Ran inference on the test dataset:
  - Predicted the soil class for each test image.
  - Stored the predictions along with their corresponding image IDs for submission.


In [8]:
# Load trained model weights
model = resnet34(weights=ResNet34_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 4)
model.load_state_dict(torch.load("best_model.pth"))
model = model.to(device)
model.eval()

# Label decoding map
inv_label_map = {0: 'Alluvial soil', 1: 'Black Soil', 2: 'Clay soil', 3: 'Red soil'}

# Predict test labels
predictions = []
with torch.no_grad():
    for images, image_ids in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        for image_id, pred in zip(image_ids, preds):
            predictions.append((image_id, inv_label_map[pred]))


## Creating Submission File

- Created a DataFrame from the list of predictions containing image IDs and their predicted soil types.
- Saved the DataFrame to a CSV file named `submission.csv` without the index column.
- Printed confirmation showing the number of rows generated in the submission file.


In [9]:
# Create submission DataFrame and save to CSV
submission = pd.DataFrame(predictions, columns=["image_id", "soil_type"])
submission.to_csv("submission.csv", index=False)

print("submission.csv generated with", len(submission), "rows.")


✅ submission.csv generated with 341 rows.
