In [50]:
import os

is_kaggle_notebook = os.path.exists("/kaggle/input")

# 必要パッケージをインストール
if is_kaggle_notebook:
    !pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
    !pip install /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl
    !pip install /kaggle/input/mordredcommunity/mordredcommunity-2.0.6-py3-none-any.whl

In [51]:
import sys
import json
import warnings
from pathlib import Path
from glob import glob

import pandas as pd
import numpy as np

if is_kaggle_notebook:
    sys.path.append("/kaggle/input/torch-molecule-src/torch-molecule")

warnings.filterwarnings("ignore")

In [52]:
pr_number = 1

In [53]:
if is_kaggle_notebook:
    module_path = f"/kaggle/input/myproject-pr-{pr_number:04}"
    !mkdir src
    !cp -r $module_path/* src/
    src_path = "./"
else:
    src_path = "../"

sys.path.append(src_path)

from src.data import (
    add_descriptors,
    add_descriptors_mordred,
    add_external_data,
    add_graph_features,
    add_count_atoms,
    load_data,
    make_smile_canonical,
    add_maccs,
    generate_trimer
)
from src.model import train_lgb_for_target, load_lgb_model, get_model
from src.utils import NULL_FOR_SUBMISSION, generate_scaffold, score

In [79]:
configs = [
    {
        "exp": "exp048-3",
        "model": "lgb",
        # "is_trimmer_cyclic": True,
    }, 
    # {
    #     "exp": "exp047-1",
    #     "model": "gnn",
    #     "is_trimmer_cyclic": False,
    # },
]
weight_name = "20250831"


In [85]:
{k: {1: 1} for k in ["a", "b"]}

{'a': {1: 1}, 'b': {1: 1}}

In [86]:
targets = ["Tg", "FFV", "Tc", "Density", "Rg"]

if is_kaggle_notebook:
    weight_path = Path(f"/kaggle/input/ensemble-{weight_name}/weights.json")
else:
    weight_path = Path(f"../outputs/ensemble/{weight_name}/weights.json")

# configs の要素が 1 つのときは重みを 1にする
if len(configs) == 1:
    weights = {target: {configs[0]["exp"]: 1} for target in targets}
else:
    with open(weight_path, "r") as f:
        weights = json.load(f)
print(weights)

if is_kaggle_notebook:
    data_dir = Path("/kaggle/input")
else:
    data_dir = Path("../data/raw")

submission = pd.read_csv(
    data_dir / "neurips-open-polymer-prediction-2025/sample_submission.csv"
)

{'Tg': {'exp048-3': 1}, 'FFV': {'exp048-3': 1}, 'Tc': {'exp048-3': 1}, 'Density': {'exp048-3': 1}, 'Rg': {'exp048-3': 1}}


In [72]:
# 特徴量を生成
_, test = load_data(data_dir)
test["SMILES"] = test["SMILES"].apply(make_smile_canonical)
test = add_maccs(test)
test = add_descriptors(test, radius=2, fp_size=1024)
test = add_descriptors_mordred(test, num_confs=10, ignore_3D=True, ignore_3d_stats=True)

new_cols = []
seen = {}
for col in test.columns:
    if col in seen:
        seen[col] += 1
        new_cols.append(f"{col}_{seen[col]}")
    else:
        seen[col] = 0
        new_cols.append(col)

test.columns = new_cols

# グラフ特徴量
test = add_graph_features(test)
test = add_count_atoms(test)
features = test.drop(["id", "SMILES"], axis=1).columns
for col in features:
    if test[col].dtype == "object":
        test[col] = pd.to_numeric(test[col], errors="coerce")

# トリマー環状化合物を生成
error_cnt = 0
trimer_smiles = []
for smiles in test["SMILES"].values:
    try:
        trimer_smiles.append(generate_trimer(smiles))
    except ValueError as e:
        print(f"smiles: {smiles}, {e}")
        trimer_smiles.append(smiles)
        error_cnt += 1
test["SMILES_trimmer_cyclic"] = trimer_smiles
print(f"error smiles count: {error_cnt}")

Generating maccs:   0%|          | 0/3 [00:00<?, ?it/s]

Generating descriptors:   0%|          | 0/3 [00:00<?, ?it/s]

mordred desc:   0%|          | 0/3 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 14.16it/s]


error smiles count: 0


In [87]:
for config in configs:
    print(config)
    exp = config["exp"]
    model_name = config["model"]
    is_trimmer_cyclic = config.get("is_trimmer_cyclic", False)
    
    dataset_id = f"model-{exp}"
    
    if model_name in ["gnn", "grea"]:
        extension = "pt"
    elif model_name == "lgb":
        extension = "txt"
    
    
    if is_kaggle_notebook:
        model_paths = list(glob(f"/kaggle/input/{dataset_id}/*.{extension}"))
    else:
        model_paths = list(glob(f"../outputs/{exp}/model/*.{extension}"))
    
    print(model_paths)
        
    if is_trimmer_cyclic:
        X_test = test["SMILES"].to_list()
    else:
        X_test = test["SMILES_trimmer_cyclic"].to_list()
    
    for idx, target in enumerate(targets):
        weight = weights[target][exp]
        print(weight)
        sub = np.zeros(len(test))
        use_model_paths = [path for path in model_paths if target in path]
        for model_path in use_model_paths:
            if model_name in ["gnn", "grea"]:
                model = get_model(model_name)()
            
                model.load(model_path)
                
                pred = model.predict(X_test)["prediction"].flatten()
            elif model_name == "lgb":
                model = load_lgb_model(model_path)
                features = model.feature_name()
                pred = model.predict(test[features], num_iteration=model.best_iteration)
            
            sub += pred / len(use_model_paths)
        submission[target] += sub * weight
    
display(submission.head())

{'exp': 'exp048-3', 'model': 'lgb'}
['../outputs/exp048-3/model/model_Tc_2.txt', '../outputs/exp048-3/model/model_Rg_4.txt', '../outputs/exp048-3/model/model_Tg_1.txt', '../outputs/exp048-3/model/model_Rg_0.txt', '../outputs/exp048-3/model/model_FFV_0.txt', '../outputs/exp048-3/model/model_Tc_4.txt', '../outputs/exp048-3/model/model_Density_4.txt', '../outputs/exp048-3/model/model_FFV_1.txt', '../outputs/exp048-3/model/model_Tg_2.txt', '../outputs/exp048-3/model/model_Rg_1.txt', '../outputs/exp048-3/model/model_Tg_4.txt', '../outputs/exp048-3/model/model_Density_0.txt', '../outputs/exp048-3/model/model_Density_3.txt', '../outputs/exp048-3/model/model_FFV_2.txt', '../outputs/exp048-3/model/model_Tc_0.txt', '../outputs/exp048-3/model/model_Tg_0.txt', '../outputs/exp048-3/model/model_FFV_3.txt', '../outputs/exp048-3/model/model_Rg_2.txt', '../outputs/exp048-3/model/model_Rg_3.txt', '../outputs/exp048-3/model/model_FFV_4.txt', '../outputs/exp048-3/model/model_Tg_3.txt', '../outputs/exp048-

Unnamed: 0,id,Tg,FFV,Tc,Density,Rg
0,1109053969,156.466519,0.376884,0.184619,1.119226,21.343859
1,1422188626,156.680937,0.377837,0.240435,1.06141,20.466604
2,2032016830,142.91914,0.35087,0.275559,1.12503,21.238744


In [21]:
if is_kaggle_notebook:
    submission.to_csv("submission.csv", index=False)
    print("✅ submission saved to submission.csv")

In [69]:
test

Unnamed: 0,id,SMILES,maccs_0,maccs_1,maccs_2,maccs_3,maccs_4,maccs_5,maccs_6,maccs_7,...,num_cycle,num_C,num_c,num_O,num_N,num_F,num_Cl,num_positive_ions,num_negative_ions,SMILES_trimmer_cyclic
0,1109053969,*Oc1ccc(C=NN=Cc2ccc(Oc3ccc(C(c4ccc(*)cc4)(C(F)...,0,0,0,0,0,0,0,0,...,4.0,5,24,2,2,6,0,0,0,Oc1ccc(C=NN=Cc2ccc(Oc3ccc(C(c4ccc(Oc5ccc(C=NN=...
1,1422188626,*Oc1ccc(C(C)(C)c2ccc(Oc3ccc(C(=O)c4cccc(C(=O)c...,0,0,0,0,0,0,0,0,...,5.0,5,30,4,0,0,0,0,0,CC(C)(c1ccc(O)cc1)c1ccc(Oc2ccc(C(=O)c3cccc(C(=...
2,2032016830,*c1cccc(OCCCCCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6...,0,0,0,0,0,0,0,0,...,6.0,12,24,6,2,0,0,0,0,O=C1NC(=O)c2c1cccc2-c1ccc2c(c1)C(=O)N(c1cccc(O...
