In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [12]:
class CFG:
    img_size    = 512
    batch_size  = 16
    model_name  = 'tf_efficientnet_b4_ns'
    weights_path= '/kaggle/input/soil_model-3/pytorch/default/1/best_fold3.pth'
    test_dir    = '/kaggle/input/soil-classification-part-2/soil_competition-2025/test'
    test_csv    = '/kaggle/input/soil-classification-part-2/soil_competition-2025/test_ids.csv'
    device      = 'cuda' if torch.cuda.is_available() else 'cpu'
    threshold   = 0.5

In [13]:
test_transform = A.Compose([
    A.Resize(CFG.img_size, CFG.img_size),
    A.Normalize(),
    ToTensorV2()
])

In [14]:
class SoilTestDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df       = df.reset_index(drop=True)
        self.img_dir  = img_dir
        self.transform= transform
        # build stem→filename map
        self.stem2file = {}
        for fname in os.listdir(self.img_dir):
            stem,_ = os.path.splitext(fname)
            self.stem2file[stem] = fname

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = str(self.df.loc[idx, 'image_id']).strip()
        stem   = os.path.splitext(img_id)[0]
        fname  = self.stem2file.get(stem)
        if fname is None:
            raise FileNotFoundError(f"No file for id {img_id}")
        path   = os.path.join(self.img_dir, fname)
        img    = np.array(Image.open(path).convert('RGB'))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, img_id

In [15]:
model = timm.create_model(CFG.model_name, pretrained=False, num_classes=1)
state = torch.load(CFG.weights_path, map_location=CFG.device)
model.load_state_dict(state)
model.to(CFG.device)
model.eval()

EfficientNet(
  (conv_stem): Conv2dSame(3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNormAct2d(
    48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNormAct2d(
          48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNo

In [16]:
test_df = pd.read_csv(CFG.test_csv)
test_ds = SoilTestDataset(test_df, CFG.test_dir, transform=test_transform)
test_loader = DataLoader(test_ds, batch_size=CFG.batch_size, shuffle=False, num_workers=4)

In [17]:
preds = []
ids   = []
with torch.no_grad():
    for imgs, img_ids in test_loader:
        imgs = imgs.to(CFG.device)
        logits = model(imgs)
        probs  = torch.sigmoid(logits).cpu().numpy().flatten()
        preds.extend((probs > CFG.threshold).astype(int).tolist())
        ids.extend(img_ids)

In [19]:
submission = pd.DataFrame({
    'image_id': ids,
    'label': preds
})
submission.to_csv('submission.csv', index=False)
print("Created submissions.csv")

Created submissions.csv


In [20]:
from IPython.display import FileLink
display(FileLink("submission.csv"))