In [1]:
import os

is_kaggle_notebook = os.path.exists("/kaggle/input")

# 必要パッケージをインストール
if is_kaggle_notebook:
    !pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
    !pip install /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl

In [2]:
import os
import sys
import warnings
from pathlib import Path
from glob import glob

import pandas as pd

if is_kaggle_notebook:
    sys.path.append("/kaggle/input/torch-molecule-src/torch-molecule")

warnings.filterwarnings("ignore")

In [3]:
pr_number = 1

In [4]:
if is_kaggle_notebook:
    module_path = f"/kaggle/input/myproject-pr-{pr_number:04}"
    !mkdir src
    !cp -r $module_path/* src/
    src_path = "./"
else:
    src_path = "../"

sys.path.append(src_path)

from src.data import (
    add_descriptors,
    add_external_data,
    add_graph_features,
    add_count_atoms,
    load_data,
    make_smile_canonical,
)
from src.model import train_lgb_for_target, load_lgb_model
from src.utils import NULL_FOR_SUBMISSION, generate_scaffold, score
from src.utils.upload_kaggle_dataset import (
    create_kaggle_dataset_metadata,
    upload_kaggle_dataset,
)

In [8]:
exp = "exp024"
model_name = "lgb"
dataset_id = f"model-{exp}"
targets = ["Tg", "FFV", "Tc", "Density", "Rg"]

if model_name in ["gnn", "grea"]:
    extension = "pt"
elif model_name == "lgb":
    extension = "txt"

if is_kaggle_notebook:
    model_paths = list(glob(f"/kaggle/input/{dataset_id}/*.{extension}"))
    data_dir = Path("/kaggle/input")
else:
    model_paths = list(glob(f"../outputs/{exp}/model/*.{extension}"))
    data_dir = Path("../data/raw")

print(model_paths)

_, test = load_data(data_dir)
# SMILES 式を一意化
test["SMILES"] = test["SMILES"].apply(make_smile_canonical)
test = add_descriptors(test)
test = add_graph_features(test)
test = add_count_atoms(test)
features = test.drop(["id", "SMILES"], axis=1).columns


submission = pd.read_csv(
    data_dir / "neurips-open-polymer-prediction-2025/sample_submission.csv"
)

X_test = test["SMILES"].to_list()

use_model_paths = [path for path in model_paths if model_name in path]

if model_name in ["gnn", "grea"]:
    for model_path in use_model_paths:
        model = get_model(model_name)()
        
        model.load(model_path)
        sub = model.predict(X_test)["prediction"]
    
        for idx, target in enumerate(targets):
            submission[target] += sub[:, idx] / len(model_paths)
elif model_name == "lgb":
    for model_path in use_model_paths:
        model = load_lgb_model(model_path)
    
        for idx, target in enumerate(targets):
            submission[target] += model.predict(X_test) / len(model_paths)


['../outputs/exp024/model/model_2.txt', '../outputs/exp024/model/model_3.txt', '../outputs/exp024/model/model_0.txt', '../outputs/exp024/model/model_4.txt', '../outputs/exp024/model/model_1.txt']


Generating descriptors:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
if is_kaggle_notebook:
    submission.to_csv("submission.csv", index=False)
    print("✅ submission saved to submission.csv")