In [None]:
# inference.ipynb

import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights

from sklearn.metrics import f1_score, classification_report
import joblib

# Paths and Data Loading
base_path = "/kaggle/input/soil-classification-1/soil_classification-2025"
train_img_dir = os.path.join(base_path, "train")
test_img_dir = os.path.join(base_path, "test")

train_csv_path = os.path.join(base_path, "train_labels.csv")
test_ids_path = os.path.join(base_path, "test_ids.csv")

train_df = pd.read_csv(train_csv_path)
test_ids = pd.read_csv(test_ids_path)

train_df = train_df.rename(columns={'image_id': 'image', 'soil_type': 'label'})
test_ids = test_ids.rename(columns={'image_id': 'image'})

# Load label encoder
label_encoder = joblib.load('label_encoder.pkl')
train_df['label_enc'] = label_encoder.transform(train_df['label'])

# Split train and validation
from sklearn.model_selection import train_test_split

_, val_split = train_test_split(
    train_df, 
    test_size=0.2, 
    stratify=train_df['label_enc'], 
    random_state=42
)

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Dataset class
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, is_test=False):
        self.df = df.reset_index(drop=True).copy()
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image']
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image, img_name
        else:
            label = self.df.iloc[idx]['label_enc']
            return image, label

val_dataset = SoilDataset(val_split, train_img_dir, transform=transform)
test_dataset = SoilDataset(test_ids, test_img_dir, transform=transform, is_test=True)

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

# Device and Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)
model.fc = nn.Linear(model.fc.in_features, len(label_encoder.classes_))
model.load_state_dict(torch.load('resnet18_soil_model.pth', map_location=device))
model = model.to(device)
model.eval()

# Validation
y_true, y_pred = [], []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

print("Validation F1 Score:", f1_score(y_true, y_pred, average='weighted'))
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

# Test predictions and submission
test_preds = []
with torch.no_grad():
    for images, img_names in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        test_preds.extend(zip(img_names, preds.cpu().numpy()))

submission_df = pd.DataFrame(test_preds, columns=['image_id', 'label_enc'])
submission_df['soil_type'] = label_encoder.inverse_transform(submission_df['label_enc'])
submission_df = submission_df[['image_id', 'soil_type']]

submission_df.to_csv('submission-1.csv', index=False)
print(submission_df.head())
