# 🧠 Soil Image Classification Challenge - 5 Epochs Submission
**Competition**: Annam.ai @ IIT Ropar

Train a CNN model to classify soil images into one of the four categories: Alluvial, Black, Clay, or Red soil.

In [1]:
# SECTION 1: Setup
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [2]:
# SECTION 2: Paths and Labels
DATA_DIR = "soil_classification-2025"
TRAIN_DIR = os.path.join(DATA_DIR, "train")
TEST_DIR = os.path.join(DATA_DIR, "test")
train_labels_df = pd.read_csv(os.path.join(DATA_DIR, "train_labels.csv"))
test_ids_df = pd.read_csv(os.path.join(DATA_DIR, "test_ids.csv"))
label_encoder = LabelEncoder()
train_labels_df['label'] = label_encoder.fit_transform(train_labels_df['soil_type'])


In [3]:
# SECTION 3: Custom Dataset
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, test=False):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        if self.test:
            return image, img_id
        label = self.df.iloc[idx]['label']
        return image, label


In [4]:
# SECTION 4: Transforms and Dataloaders
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = SoilDataset(train_labels_df, TRAIN_DIR, transform=transform_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = SoilDataset(test_ids_df, TEST_DIR, transform=transform_test, test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [5]:
# SECTION 5: Model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)
model = model.to(device)




In [6]:
# SECTION 6: Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/5"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/5], Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/5: 100%|██████████| 39/39 [05:48<00:00,  8.93s/it]


Epoch [1/5], Loss: 0.3885


Epoch 2/5: 100%|██████████| 39/39 [05:01<00:00,  7.72s/it]


Epoch [2/5], Loss: 0.1086


Epoch 3/5: 100%|██████████| 39/39 [05:14<00:00,  8.07s/it]


Epoch [3/5], Loss: 0.0667


Epoch 4/5: 100%|██████████| 39/39 [05:31<00:00,  8.51s/it]


Epoch [4/5], Loss: 0.0489


Epoch 5/5: 100%|██████████| 39/39 [05:18<00:00,  8.16s/it]

Epoch [5/5], Loss: 0.0416





In [7]:
# SECTION 7: Prediction
model.eval()
predictions = []
image_ids = []

with torch.no_grad():
    for images, ids in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        image_ids.extend(ids)


Predicting: 100%|██████████| 11/11 [00:38<00:00,  3.54s/it]


In [8]:
# SECTION 8: Submission
predicted_labels = label_encoder.inverse_transform(predictions)
submission_df = pd.DataFrame({
    "image_id": image_ids,
    "soil_type": predicted_labels
})
submission_df.to_csv("submission.csv", index=False)
print("submission.csv saved.")


submission.csv saved.
