Import Libraries

In [9]:
import torch
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm

In [None]:
Config

In [11]:
TEST_DIR = "../data/test"
TEST_IDS_CSV = "../data/test_ids.csv"
BATCH_SIZE = 32
NUM_CLASSES = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CLASSES = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']

In [None]:
Image Transformation

In [13]:
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [None]:
Test Dataset

In [15]:
class TestSoilDataset(Dataset):
    def __init__(self, img_dir, test_ids_csv, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.test_ids_df = pd.read_csv(test_ids_csv)
        self.image_ids = self.test_ids_df['image_id'].tolist()

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_name = self.image_ids[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_name

test_dataset = TestSoilDataset(TEST_DIR, TEST_IDS_CSV, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
Load Model

In [21]:
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, NUM_CLASSES)
model.load_state_dict(torch.load("../models/best_model.pth", map_location=DEVICE))
model = model.to(DEVICE)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
Prediction

In [23]:
predictions = []
with torch.no_grad():
    for images, image_ids in tqdm(test_loader, desc="Predicting"):
        images = images.to(DEVICE)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        preds = preds.cpu().numpy()
        for img_id, pred in zip(image_ids, preds):
            predictions.append((img_id, CLASSES[pred]))

Predicting: 100%|██████████████████████████████████████████████████████████████████████| 11/11 [00:24<00:00,  2.25s/it]


In [None]:
Prediction to CSV

In [25]:
submission_df = pd.DataFrame(predictions, columns=["image_id", "soil_type"])
submission_df.to_csv("submission.csv", index=False)

print("Inference complete. Submission saved as submission.csv")

Inference complete. Submission saved as submission.csv


In [33]:
import os
import json

save_dir = "../docs/cards"
os.makedirs(save_dir, exist_ok=True)  

metrics = {
    "num_predictions": len(predictions),
    "classes": CLASSES,
    "sample_predictions": predictions[:10]
}

json_path = os.path.join(save_dir, "ml-metrics.json")
with open(json_path, "w") as f:
    json.dump(metrics, f, indent=4)

print(f"Metrics saved to {json_path}")


Metrics saved to ../docs/cards\ml-metrics.json
