In [1]:
import os

is_kaggle_notebook = os.path.exists("/kaggle/input")

# 必要パッケージをインストール
if is_kaggle_notebook:
    !pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
    !pip install /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl

In [2]:
import os
import sys
import warnings
from pathlib import Path
from glob import glob

import pandas as pd

if is_kaggle_notebook:
    sys.path.append("/kaggle/input/torch-molecule-src/torch-molecule")

warnings.filterwarnings("ignore")

In [3]:
pr_number = 1

In [4]:
if is_kaggle_notebook:
    module_path = f"/kaggle/input/myproject-pr-{pr_number:04}"
    !mkdir src
    !cp -r $module_path/* src/
    src_path = "./"
else:
    src_path = "../"

sys.path.append(src_path)

from src.data import (
    add_descriptors,
    add_external_data,
    add_graph_features,
    add_count_atoms,
    load_data,
    make_smile_canonical,
)
from src.model import train_lgb_for_target, load_lgb_model
from src.utils import NULL_FOR_SUBMISSION, generate_scaffold, score

In [9]:
exp = "exp024"
model_name = "lgb"
dataset_id = f"model-{exp}"
targets = ["Tg", "FFV", "Tc", "Density", "Rg"]

if model_name in ["gnn", "grea"]:
    extension = "pt"
elif model_name == "lgb":
    extension = "txt"

if is_kaggle_notebook:
    model_paths = list(glob(f"/kaggle/input/{dataset_id}/*.{extension}"))
    data_dir = Path("/kaggle/input")
else:
    model_paths = list(glob(f"../outputs/{exp}/model/*.{extension}"))
    data_dir = Path("../data/raw")

print(model_paths)

_, test = load_data(data_dir)
# SMILES 式を一意化
test["SMILES"] = test["SMILES"].apply(make_smile_canonical)
test = add_descriptors(test)
test = add_graph_features(test)
test = add_count_atoms(test)
features = test.drop(["id", "SMILES"], axis=1).columns

print(features)
display(test.head())

submission = pd.read_csv(
    data_dir / "neurips-open-polymer-prediction-2025/sample_submission.csv"
)

X_test = test["SMILES"].to_list()

if model_name in ["gnn", "grea"]:
    use_model_paths = [path for path in model_paths if model_name in path]
    for model_path in use_model_paths:
        model = get_model(model_name)()
        
        model.load(model_path)
        sub = model.predict(X_test)["prediction"]
    
        for idx, target in enumerate(targets):
            submission[target] += sub[:, idx] / len(model_paths)
elif model_name == "lgb":
    for idx, target in enumerate(targets):
        use_model_paths = [model_path for model_path in model_paths if target in model_path.split("/")[-1]]
        print(target)
        print(use_model_paths)        
        for model_path in use_model_paths:
            model = load_lgb_model(model_path)
            submission[target] += model.predict(test[features], num_iteration=model.best_iteration) / len(use_model_paths)

['../outputs/exp024/model/model_Tc_3.txt', '../outputs/exp024/model/model_Tc_1.txt', '../outputs/exp024/model/model_Density_1.txt', '../outputs/exp024/model/model_2.txt', '../outputs/exp024/model/model_Tc_0.txt', '../outputs/exp024/model/model_FFV_0.txt', '../outputs/exp024/model/model_Rg_0.txt', '../outputs/exp024/model/model_Rg_4.txt', '../outputs/exp024/model/model_Density_0.txt', '../outputs/exp024/model/model_Tc_4.txt', '../outputs/exp024/model/model_Rg_3.txt', '../outputs/exp024/model/model_Rg_1.txt', '../outputs/exp024/model/model_FFV_2.txt', '../outputs/exp024/model/model_Rg_2.txt', '../outputs/exp024/model/model_Tg_0.txt', '../outputs/exp024/model/model_Density_2.txt', '../outputs/exp024/model/model_3.txt', '../outputs/exp024/model/model_0.txt', '../outputs/exp024/model/model_Tg_1.txt', '../outputs/exp024/model/model_Tg_3.txt', '../outputs/exp024/model/model_Tg_4.txt', '../outputs/exp024/model/model_FFV_4.txt', '../outputs/exp024/model/model_Tc_2.txt', '../outputs/exp024/model

Generating descriptors:   0%|          | 0/3 [00:00<?, ?it/s]

Index(['MaxAbsEStateIndex', 'MaxEStateIndex', 'MinAbsEStateIndex',
       'MinEStateIndex', 'qed', 'SPS', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt',
       'NumValenceElectrons',
       ...
       'avg_shortest_path', 'num_cycle', 'num_C', 'num_c', 'num_O', 'num_N',
       'num_F', 'num_Cl', 'num_positive_ions', 'num_negative_ions'],
      dtype='object', length=1252)


Unnamed: 0,id,SMILES,MaxAbsEStateIndex,MaxEStateIndex,MinAbsEStateIndex,MinEStateIndex,qed,SPS,MolWt,HeavyAtomMolWt,...,avg_shortest_path,num_cycle,num_C,num_c,num_O,num_N,num_F,num_Cl,num_positive_ions,num_negative_ions
0,1109053969,*Oc1ccc(C=NN=Cc2ccc(Oc3ccc(C(c4ccc(*)cc4)(C(F)...,14.296609,14.296609,0.08466,-5.63114,0.133192,13.384615,540.463,522.319,...,8.673171,4.0,5,24,2,2,6,0,0,0
1,1422188626,*Oc1ccc(C(C)(C)c2ccc(Oc3ccc(C(=O)c4cccc(C(=O)c...,13.208391,13.208391,0.079396,-0.162743,0.195542,11.74359,510.589,484.381,...,8.836585,5.0,5,30,4,0,0,0,0,0
2,2032016830,*c1cccc(OCCCCCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6...,13.556487,13.556487,0.203889,-0.654083,0.137097,14.454545,586.644,556.404,...,9.912077,6.0,12,24,6,2,0,0,0,0


Tg
['../outputs/exp024/model/model_Tg_0.txt', '../outputs/exp024/model/model_Tg_1.txt', '../outputs/exp024/model/model_Tg_3.txt', '../outputs/exp024/model/model_Tg_4.txt', '../outputs/exp024/model/model_Tg_2.txt']
FFV
['../outputs/exp024/model/model_FFV_0.txt', '../outputs/exp024/model/model_FFV_2.txt', '../outputs/exp024/model/model_FFV_4.txt', '../outputs/exp024/model/model_FFV_1.txt', '../outputs/exp024/model/model_FFV_3.txt']
Tc
['../outputs/exp024/model/model_Tc_3.txt', '../outputs/exp024/model/model_Tc_1.txt', '../outputs/exp024/model/model_Tc_0.txt', '../outputs/exp024/model/model_Tc_4.txt', '../outputs/exp024/model/model_Tc_2.txt']
Density
['../outputs/exp024/model/model_Density_1.txt', '../outputs/exp024/model/model_Density_0.txt', '../outputs/exp024/model/model_Density_2.txt', '../outputs/exp024/model/model_Density_3.txt', '../outputs/exp024/model/model_Density_4.txt']
Rg
['../outputs/exp024/model/model_Rg_0.txt', '../outputs/exp024/model/model_Rg_4.txt', '../outputs/exp024/m

In [7]:
if is_kaggle_notebook:
    submission.to_csv("submission.csv", index=False)
    print("✅ submission saved to submission.csv")