In [1]:
import sys

sys.path.insert(
    0,
    "/kaggle/input/csiro-timm-latest/pytorch-image-models-1.0.22"
)

import timm
print("version:", timm.__version__)
print("file:", timm.__file__)



version: 1.0.22
file: /kaggle/input/csiro-timm-latest/pytorch-image-models-1.0.22/timm/__init__.py


In [2]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
tqdm.pandas()

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import timm
from pytorch_lightning import LightningModule
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from sklearn.model_selection import KFold, GroupKFold, StratifiedGroupKFold

from types import SimpleNamespace

  data = fetch_version_info()


In [3]:
timm.list_models("*dino*")[:10]

['vit_7b_patch16_dinov3',
 'vit_base_patch14_dinov2',
 'vit_base_patch14_reg4_dinov2',
 'vit_base_patch16_dinov3',
 'vit_base_patch16_dinov3_qkvb',
 'vit_giant_patch14_dinov2',
 'vit_giant_patch14_reg4_dinov2',
 'vit_huge_plus_patch16_dinov3',
 'vit_huge_plus_patch16_dinov3_qkvb',
 'vit_large_patch14_dinov2']

In [4]:
DATA_ROOT = '/kaggle/input/csiro-biomass/'

# train
train_df = pd.read_csv(f'{DATA_ROOT}/train.csv')
train_df[['sample_id_prefix', 'sample_id_suffix']] = train_df.sample_id.str.split('__', expand=True)

# agg_train_df の作成
cols = ['sample_id_prefix', 'image_path', 'Sampling_Date', 'State', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm']
agg_train_df = train_df.groupby(cols).apply(lambda df: df.set_index('target_name').target)
agg_train_df.reset_index(inplace=True)
agg_train_df.columns.name = None

agg_train_df['image'] = agg_train_df.image_path.progress_apply(
    lambda path: Image.open(DATA_ROOT + path).convert('RGB')
)


  agg_train_df = train_df.groupby(cols).apply(lambda df: df.set_index('target_name').target)


  0%|          | 0/357 [00:00<?, ?it/s]

In [5]:
# 画像サイズ確認
agg_train_df['image_size'] = agg_train_df.image.apply(lambda x: x.size)
agg_train_df['image_size'].value_counts()

# ターゲット合計確認
np.isclose(agg_train_df[['Dry_Green_g', 'Dry_Clover_g']].sum(axis=1),
           agg_train_df['GDM_g'], atol=1e-4).mean()

np.isclose(agg_train_df[['GDM_g', 'Dry_Dead_g']].sum(axis=1),
           agg_train_df['Dry_Total_g'], atol=1e-4).mean()


0.9971988795518207

In [6]:
# test.csv
test_df = pd.read_csv(DATA_ROOT + 'test.csv')
test_df[['sample_id_prefix', 'sample_id_suffix']] = test_df.sample_id.str.split('__', expand=True)

# 推論用 agg_test_df
agg_test_df = test_df.drop_duplicates(subset='sample_id_prefix').copy()

agg_test_df['image'] = agg_test_df.image_path.progress_apply(
    lambda path: Image.open(DATA_ROOT + path).convert('RGB')
)


  0%|          | 0/1 [00:00<?, ?it/s]

In [25]:
class InferenceDataset(Dataset):
    def __init__(self, df, transforms):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = self.df.iloc[idx]["image"]
        width, height = image.size
        mid_point = width // 2

        # 左右に分割
        left_image = image.crop((0, 0, mid_point, height))
        right_image = image.crop((mid_point, 0, width, height))

        if self.transforms:
            left_image = self.transforms(image=np.array(left_image))["image"]
            right_image = self.transforms(image=np.array(right_image))["image"]

        return left_image, right_image


In [26]:
!ls /kaggle/input/model-dinov3-base/model.safetensors

/kaggle/input/model-dinov3-base/model.safetensors


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
from safetensors.torch import load_file

class TimmEncoder(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg

        self.encoder = timm.create_model(
            cfg.model.backbone,
            in_chans=cfg.task.slice_depth,
            pretrained=False,
            # drop_path_rate=cfg.model.drop_path_rate,
            features_only=False,
            num_classes=0,
            global_pool="",  # 自前でpoolingするのでここは空
        )
        state_dict = load_file("/kaggle/input/model-dinov3-base/model.safetensors")
        self.encoder.load_state_dict(state_dict)
        self.dropout = nn.Dropout(cfg.model.dropout)

        # 各ターゲット用ヘッド
        def make_head():
            return nn.Sequential(
                nn.Linear(self.encoder.num_features * 2, 8),
                nn.ReLU(inplace=True),
                self.dropout,
                nn.Linear(8, 1),
            )

        self.head_green = make_head()
        self.head_clover = make_head()
        self.head_dead = make_head()

        self.softplus = nn.Softplus(beta=1.0)

        if cfg.model.freeze_backbone:
            for p in self.encoder.parameters():
                p.requires_grad = False

    def forward(self, left_img: torch.Tensor, right_img: torch.Tensor):
        """
        左右画像をそれぞれ encoder に通して concat → 3ヘッド回帰
        """
        # (B, C, H, W) -> (B, num_features)
        left_feat = self.encoder.forward_features(left_img)  # (B, N, C) の場合もある
        right_feat = self.encoder.forward_features(right_img)

        # ViT系の場合、Global Poolingして (B, C) に
        if left_feat.ndim == 3:
            left_feat = left_feat.mean(dim=1)
            right_feat = right_feat.mean(dim=1)

        combined = torch.cat([left_feat, right_feat], dim=1)

        green = self.softplus(self.head_green(combined))
        clover = self.softplus(self.head_clover(combined))
        dead = self.softplus(self.head_dead(combined))

        out = torch.cat([green, clover, dead], dim=1)
        return out

    def set_grad_checkpointing(self, enable: bool = True):
        self.encoder.set_grad_checkpointing(enable)


In [28]:
def get_model_from_cfg(cfg):
    if cfg.model.arch == "timm_encoder":
        model = TimmEncoder(cfg)
    else:
        raise ValueError(f"Unknown model architecture: {cfg.model.arch}")
    return model

In [29]:

def get_loss(cfg):
    return MyLoss(cfg)

class MyLoss(nn.Module):
    def __init__(self, cfg):
        super(MyLoss, self).__init__()
        self.cfg = cfg

        # 基本は SmoothL1（元コードと同じ）
        self.criterion = nn.SmoothL1Loss(reduction="mean")

        # 将来の拡張用（今は使わないが cfg で制御できる）
        self.use_gdm = getattr(cfg.loss, "use_gdm", False)
        self.use_total = getattr(cfg.loss, "use_total", False)

    def forward(self, y_pred, y_true):
        """
        Args:
            y_pred (Tensor[float]): (batch_size, 3)
                [Dry_Green_g, Dry_Clover_g, Dry_Dead_g]
            y_true (Tensor[float]): (batch_size, 3)

        Returns:
            dict:
                {
                    "loss": total_loss,
                    "loss_reg": regression_loss,
                }
        """
        return_dict = {}

        # 形状チェック
        assert y_pred.shape == y_true.shape, \
            f"y_pred: {y_pred.shape}, y_true: {y_true.shape}"

        # --- 基本回帰損失 ---
        loss_reg = self.criterion(y_pred, y_true)
        loss_total = loss_reg

        # --- 拡張例：GDM / Total を loss に含めたい場合 ---
        if self.use_gdm:
            # GDM = Green + Clover
            gdm_pred = y_pred[:, 0] + y_pred[:, 1]
            gdm_true = y_true[:, 0] + y_true[:, 1]
            loss_gdm = self.criterion(gdm_pred, gdm_true)
            loss_total = loss_total + loss_gdm
            return_dict["loss_gdm"] = loss_gdm

        if self.use_total:
            # Total = Green + Clover + Dead
            total_pred = y_pred.sum(dim=1)
            total_true = y_true.sum(dim=1)
            loss_total_biomass = self.criterion(total_pred, total_true)
            loss_total = loss_total + loss_total_biomass
            return_dict["loss_total_biomass"] = loss_total_biomass

        return_dict["loss_reg"] = loss_reg
        return_dict["loss"] = loss_total

        return return_dict


def main():
    pass


if __name__ == '__main__':
    main()


In [30]:
from pathlib import Path
import numpy as np
from pytorch_lightning.core.module import LightningModule
from timm.utils import ModelEmaV2
from timm.optim import create_optimizer_v2
from timm.scheduler import create_scheduler_v2
import torch

from timm.utils import ModelEmaV3



class MyModel(LightningModule):
    def __init__(self, cfg, mode="train"):
        super().__init__()
        self.preds = None
        self.gts = None

        self.cfg = cfg
        self.mode = mode
        
        self.model = get_model_from_cfg(cfg)

        # epoch 集計用
        self.val_outputs = []
        self.val_targets = []

        if mode != "test" and cfg.model.ema:
            self.model_ema = ModelEmaV3(
                self.model,
                decay=cfg.model.ema_decay,
                update_after_step=cfg.model.ema_update_after_step,
            )

        self.loss = get_loss(cfg)


    def forward(self, left_img, right_img):
        return self.model(left_img, right_img)

    def training_step(self, batch, batch_idx):
        left_img, right_img, targets = batch  # (B, 3)
        targets = targets.float()

        outputs = self(left_img, right_img)
        loss_dict = self.loss(outputs, targets)

        self.log_dict(
            loss_dict,
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            sync_dist=True,
        )
        return loss_dict["loss"]

    def on_train_batch_end(self, out, batch, batch_idx):
        if self.cfg.model.ema:
            self.model_ema.update(self.model)

    def validation_step(self, batch, batch_idx):
        left_img, right_img, targets = batch
        targets = targets.float()

        outputs = self(left_img, right_img)
        loss_dict = self.loss(outputs, targets)

        self.log("val_loss", loss_dict["loss"], prog_bar=True, sync_dist=True)
        
        self.val_outputs.append(outputs.detach())
        self.val_targets.append(targets.detach())

        return loss_dict

    def on_validation_epoch_end(self):
        outputs = torch.cat(self.val_outputs).cpu().numpy()
        targets = torch.cat(self.val_targets).cpu().numpy()

        weighted_r2, r2_scores = calc_metric(outputs, targets)

        # メトリクスをログ
        self.log("val_weighted_r2", weighted_r2, prog_bar=True)

        # 複数ターゲットなら個別ログも可
        for i, r2 in enumerate(r2_scores):
            self.log(f"val_r2_target_{i}", r2)

        # 次epochに向けてクリア
        self.val_outputs.clear()
        self.val_targets.clear()

        # 複数ターゲットなら個別ログも可
        for i, r2 in enumerate(r2_scores):
            self.log(f"val_r2_target_{i}", r2)

        # 次epochに向けてクリア
        self.val_outputs.clear()
        self.val_targets.clear()

    def configure_optimizers(self):
        optimizer = create_optimizer_v2(model_or_params=self.model, **self.cfg.opt)

        scheduler, _ = create_scheduler_v2(
            optimizer=optimizer,
            num_epochs=self.cfg.trainer.max_epochs,
            **self.cfg.scheduler
        )

        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "epoch",
                "monitor": "val_weighted_r2",
            },
        }

    def lr_scheduler_step(self, scheduler, metric):
        scheduler.step(epoch=self.current_epoch)
    #     # scheduler.step_update(num_updates=self.global_step)


In [31]:
from types import SimpleNamespace

cfg = SimpleNamespace()

# --- task ---
cfg.task = SimpleNamespace(
    img_size=224,
    img_depth=16,
    fixed_depth=16,
    slice_depth=3,
    pretrain=False,
    dirname="train_npzs"
)

# --- model ---
cfg.model = SimpleNamespace(
    freeze_end_epoch=0,
    arch="timm_encoder",
    in_channels=16,
    out_channels=1,
    depth=4,
    base_filters=64,
    dropout=0.1,
    use_batchnorm=True,
    activation="relu",
    swa=False,
    freeze_backbone=False,
    backbone="vit_base_patch16_dinov3_qkvb",
    ema=False,
    resume_path=None,
    drop_path_rate=0.0,
    img_size=128,
    img_depth=16,
    kernel_size=5,
    class_num=5
)

# --- data ---
cfg.data = SimpleNamespace(
    fold_num=5,
    fold_id=0,
    num_workers=8,
    batch_size=32,
    train_all=False,
    input_dir=None,
    output_dir=None,
    val_output_dir=None
)

# --- trainer ---
cfg.trainer = SimpleNamespace(
    max_epochs=30,
    devices="auto",
    strategy="auto",
    check_val_every_n_epoch=5,
    sync_batchnorm=False,
    accelerator="gpu",
    precision=32,
    gradient_clip_val=None,
    accumulate_grad_batches=1,
    deterministic=True
)

# --- test ---
cfg.test = SimpleNamespace(
    mode="test",
    output_dir="preds_results"
)

# --- opt ---
cfg.opt = SimpleNamespace(
    opt="AdamW",
    lr=1e-3,
    weight_decay=0.01
)

# --- scheduler ---
cfg.scheduler = SimpleNamespace(
    sched="cosine",
    min_lr=0.0,
    warmup_epochs=0
)

# --- loss ---
cfg.loss = SimpleNamespace(
    mixup=0.0,
    cutmix=0.0
)

# --- wandb ---
cfg.wandb = SimpleNamespace(
    project="csiro2025",
    name="exp_0",
    fast_dev_run=False
)


In [32]:
!ls /kaggle/input/csiro-simple-exp5

exp_5_epoch029_val_loss8.5536.ckpt


In [37]:
def get_val_transforms(cfg):
    return A.Compose(
        [
            A.Resize(height=cfg.task.img_size, width=cfg.task.img_size, p=1),
            # A.RandomScale(scale_limit=(1.0, 1.0), p=1),
            # A.PadIfNeeded(min_height=cfg.task.img_size, min_width=cfg.task.img_size, p=1.0,
            #              border_mode=cv2.BORDER_CONSTANT, value=0),
            # A.Crop(y_max=self.cfg.data.val_img_h, x_max=self.cfg.data.val_img_w, p=1.0),
            A.Normalize(p=1.0, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
    )


In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ckpt_path = "/kaggle/input/csiro-simple-exp5/exp_5_epoch029_val_loss8.5536.ckpt"

model = MyModel.load_from_checkpoint(ckpt_path, cfg=cfg, mode="test")
model.to(device)
model.eval()

test_dataset = InferenceDataset(agg_test_df, get_val_transforms(cfg))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

def predict(model, dataloader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for left_img, right_img in dataloader:
            left_img = left_img.to(device)
            right_img = right_img.to(device)
            outputs = model(left_img, right_img)
            preds.append(outputs.cpu())
    return torch.cat(preds).numpy()


preds = predict(model, test_loader, device)



In [39]:
agg_test_df[['Dry_Green_g', 'Dry_Clover_g', 'Dry_Dead_g']] = preds
agg_test_df['GDM_g'] = agg_test_df.Dry_Green_g + agg_test_df.Dry_Clover_g
agg_test_df['Dry_Total_g'] = agg_test_df.GDM_g + agg_test_df.Dry_Dead_g

cols = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
sub_df = agg_test_df.set_index('sample_id_prefix')[cols].stack().reset_index()
sub_df.columns = ['sample_id_prefix', 'target_name', 'target']
sub_df['sample_id'] = sub_df.sample_id_prefix + '__' + sub_df.target_name

sub_df[['sample_id', 'target']].to_csv('submission.csv', index=False)


In [40]:
sub_df.head()

Unnamed: 0,sample_id_prefix,target_name,target,sample_id
0,ID1001187975,Dry_Clover_g,0.007295,ID1001187975__Dry_Clover_g
1,ID1001187975,Dry_Dead_g,14.745153,ID1001187975__Dry_Dead_g
2,ID1001187975,Dry_Green_g,28.728642,ID1001187975__Dry_Green_g
3,ID1001187975,Dry_Total_g,43.481091,ID1001187975__Dry_Total_g
4,ID1001187975,GDM_g,28.735937,ID1001187975__GDM_g


In [36]:
!head submission.csv

sample_id,target
ID1001187975__Dry_Clover_g,0.007294931
ID1001187975__Dry_Dead_g,14.745153
ID1001187975__Dry_Green_g,28.728642
ID1001187975__Dry_Total_g,43.48109
ID1001187975__GDM_g,28.735937
