# 🔍 Inference - Soil Detection Challenge

This notebook loads the trained soil classifier and runs inference on the test set provided in `test_ids.csv`.

---


In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torchvision import models

from src.preprocessing import TestSoilDataset, get_transforms
from src.postprocessing import predict_with_threshold


In [None]:
BASE_DIR = './data/soil_competition-2025'
TEST_DIR = os.path.join(BASE_DIR, 'test')
TEST_CSV = os.path.join(BASE_DIR, 'test.csv')

MODEL_PATH = './model_best.pth'
OUTPUT_CSV = './submission.csv'

test_df = pd.read_csv(TEST_CSV)


In [None]:
_, val_transform = get_transforms()  # Use val_transform for test as well

test_dataset = TestSoilDataset(test_df, TEST_DIR, val_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=False)
model.fc = torch.nn.Sequential(
    torch.nn.Linear(model.fc.in_features, 1),
    torch.nn.Sigmoid()
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()


In [None]:
all_probs = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        all_probs.extend(outputs.cpu().numpy().flatten())

all_probs = np.array(all_probs)


In [None]:
BEST_THRESHOLD = 0.5  # Use the one from training (you can import or hardcode here)

predictions = predict_with_threshold(all_probs, BEST_THRESHOLD)

submission_df = pd.DataFrame({
    'image': test_df['image'],
    'label': predictions
})

submission_df.to_csv(OUTPUT_CSV, index=False)
print(f"Submission saved to {OUTPUT_CSV}")
