In [3]:
import os

is_kaggle_notebook = os.path.exists("/kaggle/input")

# 必要パッケージをインストール
if is_kaggle_notebook:
    !pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
    !pip install /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl
    !pip install /kaggle/input/mordredcommunity/mordredcommunity-2.0.6-py3-none-any.whl

In [4]:
import sys
import warnings
from pathlib import Path
from glob import glob

import pandas as pd

if is_kaggle_notebook:
    sys.path.append("/kaggle/input/torch-molecule-src/torch-molecule")

warnings.filterwarnings("ignore")

In [5]:
pr_number = 1

In [6]:
if is_kaggle_notebook:
    module_path = f"/kaggle/input/myproject-pr-{pr_number:04}"
    !mkdir src
    !cp -r $module_path/* src/
    src_path = "./"
else:
    src_path = "../"

sys.path.append(src_path)

from src.data import (
    add_descriptors,
    add_external_data,
    add_graph_features,
    add_count_atoms,
    load_data,
    make_smile_canonical,
    add_maccs
)
from src.model import train_lgb_for_target, load_lgb_model
from src.utils import NULL_FOR_SUBMISSION, generate_scaffold, score

In [7]:
exp = "exp029"
model_name = "lgb"
dataset_id = f"model-{exp}"
targets = ["Tg", "FFV", "Tc", "Density", "Rg"]

if model_name in ["gnn", "grea"]:
    extension = "pt"
elif model_name == "lgb":
    extension = "txt"

if is_kaggle_notebook:
    model_paths = list(glob(f"/kaggle/input/{dataset_id}/*.{extension}"))
    data_dir = Path("/kaggle/input")
else:
    model_paths = list(glob(f"../outputs/{exp}/model/*.{extension}"))
    data_dir = Path("../data/raw")

print(model_paths)

_, test = load_data(data_dir)
# SMILES 式を一意化
test["SMILES"] = test["SMILES"].apply(make_smile_canonical)
# rdkit の記述子, morgan finger print
test = add_descriptors(test, radius=2, fp_size=1024)
test = add_maccs(test)

new_cols = []
seen = {}
for col in test.columns:
    if col in seen:
        seen[col] += 1
        new_cols.append(f"{col}_{seen[col]}")
    else:
        seen[col] = 0
        new_cols.append(col)

test.columns = new_cols
display(test.head())

submission = pd.read_csv(
    data_dir / "neurips-open-polymer-prediction-2025/sample_submission.csv"
)

X_test = test["SMILES"].to_list()

if model_name in ["gnn", "grea"]:
    use_model_paths = [path for path in model_paths if model_name in path]
    for model_path in use_model_paths:
        model = get_model(model_name)()
        
        model.load(model_path)
        sub = model.predict(X_test)["prediction"]
    
        for idx, target in enumerate(targets):
            submission[target] += sub[:, idx] / len(model_paths)
elif model_name == "lgb":
    for idx, target in enumerate(targets):
        use_model_paths = [model_path for model_path in model_paths if target in model_path.split("/")[-1]]
        print(target)
        print(use_model_paths)        
        for model_path in use_model_paths:
            model = load_lgb_model(model_path)
            features = model.feature_name()
            submission[target] += model.predict(test[features], num_iteration=model.best_iteration) / len(use_model_paths)

display(submission.head())

['../outputs/exp029/model/model_Tc_2.txt', '../outputs/exp029/model/model_Rg_4.txt', '../outputs/exp029/model/model_Tg_1.txt', '../outputs/exp029/model/model_Rg_0.txt', '../outputs/exp029/model/model_FFV_0.txt', '../outputs/exp029/model/model_Tc_4.txt', '../outputs/exp029/model/model_Density_4.txt', '../outputs/exp029/model/model_FFV_1.txt', '../outputs/exp029/model/model_Tg_2.txt', '../outputs/exp029/model/model_Rg_1.txt', '../outputs/exp029/model/model_Tg_4.txt', '../outputs/exp029/model/model_Density_0.txt', '../outputs/exp029/model/model_Density_3.txt', '../outputs/exp029/model/model_FFV_2.txt', '../outputs/exp029/model/model_Tc_0.txt', '../outputs/exp029/model/model_Tg_0.txt', '../outputs/exp029/model/model_FFV_3.txt', '../outputs/exp029/model/model_Rg_2.txt', '../outputs/exp029/model/model_Rg_3.txt', '../outputs/exp029/model/model_FFV_4.txt', '../outputs/exp029/model/model_Tg_3.txt', '../outputs/exp029/model/model_Tc_1.txt', '../outputs/exp029/model/model_Density_1.txt', '../outp

Generating descriptors:   0%|          | 0/3 [00:00<?, ?it/s]

Generating maccs:   0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,id,SMILES,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,...,maccs_157,maccs_158,maccs_159,maccs_160,maccs_161,maccs_162,maccs_163,maccs_164,maccs_165,maccs_166
0,1109053969,*Oc1ccc(C=NN=Cc2ccc(Oc3ccc(C(c4ccc(*)cc4)(C(F)...,14.296609,-5.63114,14.296609,0.08466,0.133192,540.463,522.319,540.127247,...,1,0,1,0,1,1,1,1,1,0
1,1422188626,*Oc1ccc(C(C)(C)c2ccc(Oc3ccc(C(=O)c4cccc(C(=O)c...,13.208391,-0.162743,13.208391,0.079396,0.195542,510.589,484.381,510.183109,...,1,0,1,1,0,1,1,1,1,0
2,2032016830,*c1cccc(OCCCCCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6...,13.556487,-0.654083,13.556487,0.203889,0.137097,586.644,556.404,586.210387,...,1,1,1,0,1,1,1,1,1,0


Tg
['../outputs/exp029/model/model_Tg_1.txt', '../outputs/exp029/model/model_Tg_2.txt', '../outputs/exp029/model/model_Tg_4.txt', '../outputs/exp029/model/model_Tg_0.txt', '../outputs/exp029/model/model_Tg_3.txt']
FFV
['../outputs/exp029/model/model_FFV_0.txt', '../outputs/exp029/model/model_FFV_1.txt', '../outputs/exp029/model/model_FFV_2.txt', '../outputs/exp029/model/model_FFV_3.txt', '../outputs/exp029/model/model_FFV_4.txt']
Tc
['../outputs/exp029/model/model_Tc_2.txt', '../outputs/exp029/model/model_Tc_4.txt', '../outputs/exp029/model/model_Tc_0.txt', '../outputs/exp029/model/model_Tc_1.txt', '../outputs/exp029/model/model_Tc_3.txt']
Density
['../outputs/exp029/model/model_Density_4.txt', '../outputs/exp029/model/model_Density_0.txt', '../outputs/exp029/model/model_Density_3.txt', '../outputs/exp029/model/model_Density_1.txt', '../outputs/exp029/model/model_Density_2.txt']
Rg
['../outputs/exp029/model/model_Rg_4.txt', '../outputs/exp029/model/model_Rg_0.txt', '../outputs/exp029/m

Unnamed: 0,id,Tg,FFV,Tc,Density,Rg
0,1109053969,161.042486,0.376797,0.184585,1.214456,22.10369
1,1422188626,162.675741,0.378588,0.223179,1.077502,19.893957
2,2032016830,97.460456,0.353535,0.238645,1.11855,19.668416


In [21]:
if is_kaggle_notebook:
    submission.to_csv("submission.csv", index=False)
    print("✅ submission saved to submission.csv")