## データセットの準備

In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

In [2]:
def space_clean(row):
    row = row.replace(". ", "").replace(" .", "").replace("  ", " ")
    return row


def canonicalize(smiles):
    try:
        new_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles), canonical=True)
    except:
        new_smiles = None
    return new_smiles

In [3]:
df = pd.read_csv("./data/inchi_23l_reaction_t5_ready.csv")

In [4]:
# 必須カラムの存在チェックと補完
required_cols = ["REACTANT", "CATALYST", "REAGENT", "SOLVENT", "PRODUCT"]
for col in required_cols:
    if col not in df.columns:
        df[col] = ""

# 必要に応じてYIELDを標準化（0-1に正規化）
if "YIELD" in df.columns and df["YIELD"].max() >= 100:
    df["YIELD"] = df["YIELD"].clip(0, 100) / 100
else:
    df["YIELD"] = None

In [5]:
for col in ["REAGENT", "REACTANT", "PRODUCT"]:
    df[col] = df[col].apply(space_clean)
    df[col] = df[col].apply(lambda x: canonicalize(x) if x != " " else " ")
    df = df[~df[col].isna()].reset_index(drop=True)
    df[col] = df[col].apply(lambda x: ".".join(sorted(x.split("."))))

In [6]:
df["REAGENT"] = df["CATALYST"].fillna(" ") + "." + df["REAGENT"].fillna(" ")

In [7]:
df = df.loc[df[["YIELD"]].drop_duplicates().index].reset_index(drop=True)

## モデルの読み込み

In [8]:
import numpy as np
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration, AutoConfig, PreTrainedModel

import logging
logging.getLogger("transformers").setLevel(logging.ERROR)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
class ReactionT5Yield(PreTrainedModel):
    config_class  = AutoConfig
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.model = T5ForConditionalGeneration.from_pretrained(self.config._name_or_path)
        self.model.resize_token_embeddings(self.config.vocab_size)
        self.fc1 = nn.Linear(self.config.hidden_size, self.config.hidden_size//2)
        self.fc2 = nn.Linear(self.config.hidden_size, self.config.hidden_size//2)
        self.fc3 = nn.Linear(self.config.hidden_size//2*2, self.config.hidden_size)
        self.fc4 = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        self.fc5 = nn.Linear(self.config.hidden_size, 1)

        self._init_weights(self.fc1)
        self._init_weights(self.fc2)
        self._init_weights(self.fc3)
        self._init_weights(self.fc4)
        self._init_weights(self.fc5)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=0.01)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=0.01)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def forward(self, inputs):
        device = inputs['input_ids'].device

        with torch.no_grad():
            encoder_outputs = self.model.encoder(
                input_ids=inputs['input_ids'],
                attention_mask=inputs.get('attention_mask', None),
            )
            encoder_hidden_states = encoder_outputs[0]  # (B, L, H)

            dec_input_ids = torch.full(
                (inputs['input_ids'].size(0), 1),
                self.config.decoder_start_token_id,
                dtype=torch.long,
                device=device,
            )

            outputs = self.model.decoder(
                input_ids=dec_input_ids,
                encoder_hidden_states=encoder_hidden_states,
            )
            last_hidden_states = outputs[0]  # (B, 1, H)

        output1 = self.fc1(last_hidden_states.view(-1, self.config.hidden_size))
        output2 = self.fc2(encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size))
        output = self.fc3(torch.hstack((output1, output2)))
        output = self.fc4(output)
        output = self.fc5(output)
        return output * 100



In [10]:
# 収率予測（スカラー出力）
yield_tokenizer = AutoTokenizer.from_pretrained("sagawa/ReactionT5v2-yield")
yield_model = ReactionT5Yield.from_pretrained("sagawa/ReactionT5v2-yield")

In [11]:
def predict_yield(input_str: str) -> float:
    inputs = yield_tokenizer([input_str], return_tensors="pt", truncation=True)
    with torch.no_grad():
        output = yield_model(inputs)
    return output.item()

## Optunaによるベイズ最適化

In [12]:
import optuna

In [13]:
target_product = "OB(O)c1ccc2[nH]ccc2c1"

In [14]:
reactant_list = sorted(df["REACTANT"].unique())
reagent_list = sorted(df["REAGENT"].unique())

In [15]:
true_yield_dict = {
    (row["REACTANT"], row["REAGENT"], row["PRODUCT"]): row["YIELD"]
    for _, row in df.iterrows()
}

In [16]:
def objective(trial):

    yield_model.to("cpu")
    try:
        torch.cuda.empty_cache()
    except Exception:
        pass
    
    reactant = trial.suggest_categorical("reactant", reactant_list)
    reagent = trial.suggest_categorical("reagent", reagent_list)

    input_str = f"REACTANT:{reactant}REAGENT:{reagent}PRODUCT:{target_product}"

    try:
        pred_yield = predict_yield(input_str)

        # ground truth を取得
        key = (reactant, reagent, target_product)
        if key not in true_yield_dict:
            print(f"❗ No ground truth for: {reactant} + {reagent} → {target_product}")
            true_yield = 0.0
        else:
            true_yield = true_yield_dict.get(key)

        # 誤差の計算
        if true_yield is not None:
            true_yield_pct = true_yield * 100
            error = pred_yield - true_yield_pct 
            print(f"🔎 {reactant} + {reagent} → {target_product}")
            print(f"   📈 Predicted: {pred_yield:.2f}%")
            print(f"   🧪 Ground truth: {true_yield_pct:.2f}%" if true_yield is not None else "   🧪 Ground truth: None")
            print(f"   ❗ Error: {error:+.2f}%")
        else:
            print(f"❔ No ground truth for: {reactant} + {reagent}")
            error = None

        if pred_yield < 0 or pred_yield > 100:
            return 0.0

        return pred_yield  # 目的関数は「予測収率の最大化」
    except Exception as e:
        print(f"❌ Error during trial: {e}")
        return 0.0

In [17]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

[I 2025-08-11 14:13:10,228] A new study created in memory with name: no-name-5b9a6a07-7d8f-4413-b9a1-b71967e4e8dc
[I 2025-08-11 14:13:10,373] Trial 0 finished with value: 42.103721618652344 and parameters: {'reactant': 'CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.COc1cc(C(C)(C)C)cc(C(C)(C)C)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 42.103721618652344.
[I 2025-08-11 14:13:10,479] Trial 1 finished with value: 56.20895767211914 and parameters: {'reactant': 'COc1ncc(Br)c(OC)n1.OB(O)B(O)O', 'reagent': '.CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1'}. Best is trial 1 with value: 56.20895767211914.


❗ No ground truth for: CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.OB(O)B(O)O + .COc1cc(C(C)(C)C)cc(C(C)(C)C)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1 → OB(O)c1ccc2[nH]ccc2c1
🔎 CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.OB(O)B(O)O + .COc1cc(C(C)(C)C)cc(C(C)(C)C)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 42.10%
   🧪 Ground truth: 0.00%
   ❗ Error: +42.10%
❗ No ground truth for: COc1ncc(Br)c(OC)n1.OB(O)B(O)O + .CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1 → OB(O)c1ccc2[nH]ccc2c1
🔎 COc1ncc(Br)c(OC)n1.OB(O)B(O)O + .CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 56.21%
   🧪 Ground truth: 0.00%
   ❗ Error: +56.21%


[I 2025-08-11 14:13:10,584] Trial 2 finished with value: 69.60411071777344 and parameters: {'reactant': 'Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O', 'reagent': '.Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1'}. Best is trial 2 with value: 69.60411071777344.
[I 2025-08-11 14:13:10,679] Trial 3 finished with value: 44.80626678466797 and parameters: {'reactant': 'COc1ccc(Br)cc1F.OB(O)B(O)O', 'reagent': '.CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1'}. Best is trial 2 with value: 69.60411071777344.
[I 2025-08-11 14:13:10,770] Trial 4 finished with value: 70.2636947631836 and parameters: {'reactant': 'Clc1ccc2[nH]ccc2c1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)c2ccccc2)cc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 69.60%
   🧪 Ground truth: 0.00%
   ❗ Error: +69.60%
❗ No ground truth for: COc1ccc(Br)cc1F.OB(O)B(O)O + .CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1 → OB(O)c1ccc2[nH]ccc2c1
🔎 COc1ccc(Br)cc1F.OB(O)B(O)O + .CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 44.81%
   🧪 Ground truth: 0.00%
   ❗ Error: +44.81%
🔎 Clc1ccc2[nH]ccc2c1.OB(O)B(O)O + .c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 70.26%
   🧪 Ground truth: 57.31%
   ❗ Error: +12.95%


[I 2025-08-11 14:13:10,848] Trial 5 finished with value: 35.00084686279297 and parameters: {'reactant': 'COc1ccc(Br)cc1F.OB(O)B(O)O', 'reagent': '.CN(C)c1ccc(P(c2ccccc2)c2ccccc2)cc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:10,931] Trial 6 finished with value: 55.39708709716797 and parameters: {'reactant': 'N#Cc1ccc(Cl)cc1F.OB(O)B(O)O', 'reagent': '.COc1ccccc1P(c1ccccc1OC)c1ccccc1OC'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,019] Trial 7 finished with value: 39.1043815612793 and parameters: {'reactant': 'Brc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(-n2cccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: COc1ccc(Br)cc1F.OB(O)B(O)O + .CN(C)c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 COc1ccc(Br)cc1F.OB(O)B(O)O + .CN(C)c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 35.00%
   🧪 Ground truth: 0.00%
   ❗ Error: +35.00%
❗ No ground truth for: N#Cc1ccc(Cl)cc1F.OB(O)B(O)O + .COc1ccccc1P(c1ccccc1OC)c1ccccc1OC → OB(O)c1ccc2[nH]ccc2c1
🔎 N#Cc1ccc(Cl)cc1F.OB(O)B(O)O + .COc1ccccc1P(c1ccccc1OC)c1ccccc1OC → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 55.40%
   🧪 Ground truth: 0.00%
   ❗ Error: +55.40%
❗ No ground truth for: Brc1ccc(-c2ccccc2)cc1.OB(O)B(O)O + .c1ccc(-n2cccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Brc1ccc(-c2ccccc2)cc1.OB(O)B(O)O + .c1ccc(-n2cccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 39.10%
   🧪 Ground truth: 0.00%
   ❗ Error: +39.10%


[I 2025-08-11 14:13:11,103] Trial 8 finished with value: 44.0573616027832 and parameters: {'reactant': 'COc1ccc(Br)cc1F.OB(O)B(O)O', 'reagent': '.c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,214] Trial 9 finished with value: 65.11547088623047 and parameters: {'reactant': 'Clc1ccc2c(c1)OCO2.OB(O)B(O)O', 'reagent': '.c1ccc(-c2nn(-c3ccccc3)c(-c3ccccc3)c2-n2nccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,295] Trial 10 finished with value: 63.143795013427734 and parameters: {'reactant': 'Clc1cnc2ccccc2c1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)c2ccccc2)cc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: COc1ccc(Br)cc1F.OB(O)B(O)O + .c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 COc1ccc(Br)cc1F.OB(O)B(O)O + .c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 44.06%
   🧪 Ground truth: 0.00%
   ❗ Error: +44.06%
❗ No ground truth for: Clc1ccc2c(c1)OCO2.OB(O)B(O)O + .c1ccc(-c2nn(-c3ccccc3)c(-c3ccccc3)c2-n2nccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Clc1ccc2c(c1)OCO2.OB(O)B(O)O + .c1ccc(-c2nn(-c3ccccc3)c(-c3ccccc3)c2-n2nccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 65.12%
   🧪 Ground truth: 0.00%
   ❗ Error: +65.12%
❗ No ground truth for: Clc1cnc2ccccc2c1.OB(O)B(O)O + .c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Clc1cnc2ccccc2c1.OB(O)B(O)O + .c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 63.14%
   🧪 Ground truth: 0.00%
   ❗ Error: +63.14%


[I 2025-08-11 14:13:11,391] Trial 11 finished with value: 69.60411071777344 and parameters: {'reactant': 'Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O', 'reagent': '.Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,482] Trial 12 finished with value: 38.99863815307617 and parameters: {'reactant': 'Cc1cc(F)ccc1Cl.OB(O)B(O)O', 'reagent': '.CN(C)c1ccccc1-c1ccccc1P(c1ccccc1)c1ccccc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 69.60%
   🧪 Ground truth: 0.00%
   ❗ Error: +69.60%
❗ No ground truth for: Cc1cc(F)ccc1Cl.OB(O)B(O)O + .CN(C)c1ccccc1-c1ccccc1P(c1ccccc1)c1ccccc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Cc1cc(F)ccc1Cl.OB(O)B(O)O + .CN(C)c1ccccc1-c1ccccc1P(c1ccccc1)c1ccccc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 39.00%
   🧪 Ground truth: 0.00%
   ❗ Error: +39.00%


[I 2025-08-11 14:13:11,623] Trial 13 finished with value: 66.96490478515625 and parameters: {'reactant': 'Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O', 'reagent': '.C[PH+](C)C.F[B-](F)(F)F'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,709] Trial 14 finished with value: 68.98009490966797 and parameters: {'reactant': 'Clc1ccc2[nH]ccc2c1.OB(O)B(O)O', 'reagent': '.Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:11,803] Trial 15 finished with value: 44.18809127807617 and parameters: {'reactant': 'FC(F)(F)c1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .C[PH+](C)C.F[B-](F)(F)F → OB(O)c1ccc2[nH]ccc2c1
🔎 Cc1nc2cc(OS(=O)(=O)N(C)C)ccc2s1.OB(O)B(O)O + .C[PH+](C)C.F[B-](F)(F)F → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 66.96%
   🧪 Ground truth: 0.00%
   ❗ Error: +66.96%
🔎 Clc1ccc2[nH]ccc2c1.OB(O)B(O)O + .Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 68.98%
   🧪 Ground truth: 53.95%
   ❗ Error: +15.03%
❗ No ground truth for: FC(F)(F)c1ccc(Cl)cc1.OB(O)B(O)O + .c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 FC(F)(F)c1ccc(Cl)cc1.OB(O)B(O)O + .c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 44.19%
   🧪 Ground truth: 0.00%
   ❗ Error: +44.19%


[I 2025-08-11 14:13:11,933] Trial 16 finished with value: 45.801631927490234 and parameters: {'reactant': 'FC(F)(F)c1ccc(Br)cc1.OB(O)B(O)O', 'reagent': '.COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:12,038] Trial 17 finished with value: 46.29161071777344 and parameters: {'reactant': 'COC(=O)c1ccc(Br)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)c2ccccc2)cc1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: FC(F)(F)c1ccc(Br)cc1.OB(O)B(O)O + .COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 FC(F)(F)c1ccc(Br)cc1.OB(O)B(O)O + .COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 45.80%
   🧪 Ground truth: 0.00%
   ❗ Error: +45.80%
❗ No ground truth for: COC(=O)c1ccc(Br)cc1.OB(O)B(O)O + .c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
🔎 COC(=O)c1ccc(Br)cc1.OB(O)B(O)O + .c1ccc(P(c2ccccc2)c2ccccc2)cc1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 46.29%
   🧪 Ground truth: 0.00%
   ❗ Error: +46.29%


[I 2025-08-11 14:13:12,137] Trial 18 finished with value: 48.68529510498047 and parameters: {'reactant': 'Cc1c(N)cccc1Cl.OB(O)B(O)O', 'reagent': '.C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F'}. Best is trial 4 with value: 70.2636947631836.
[I 2025-08-11 14:13:12,223] Trial 19 finished with value: 41.02374267578125 and parameters: {'reactant': 'Clc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n2c3ccccc3c3ccccc32)c1'}. Best is trial 4 with value: 70.2636947631836.


❗ No ground truth for: Cc1c(N)cccc1Cl.OB(O)B(O)O + .C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F → OB(O)c1ccc2[nH]ccc2c1
🔎 Cc1c(N)cccc1Cl.OB(O)B(O)O + .C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 48.69%
   🧪 Ground truth: 0.00%
   ❗ Error: +48.69%
❗ No ground truth for: Clc1ccc(-c2ccccc2)cc1.OB(O)B(O)O + .c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n2c3ccccc3c3ccccc32)c1 → OB(O)c1ccc2[nH]ccc2c1
🔎 Clc1ccc(-c2ccccc2)cc1.OB(O)B(O)O + .c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n2c3ccccc3c3ccccc32)c1 → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 41.02%
   🧪 Ground truth: 0.00%
   ❗ Error: +41.02%


## ベイズ最適化・ファインチューニングのループ

In [18]:
import os
import csv
import math
import time
import random
from dataclasses import dataclass

import torch.nn.functional as F
from transformers import Trainer,TrainingArguments,DataCollatorWithPadding
from optuna.samplers import TPESampler

In [19]:
class CollatorForYield:
    def __init__(self, tokenizer):
        self.pad = DataCollatorWithPadding(tokenizer)
    def __call__(self, features):
        has_labels = "labels" in features[0]
        if has_labels:
            labels = torch.tensor([float(f["labels"]) for f in features], dtype=torch.float)
        token_feats = [{k: v for k, v in f.items() if k in ("input_ids", "attention_mask")} for f in features]
        batch = self.pad(token_feats)
        if has_labels:
            batch["labels"] = labels
        return batch

In [20]:
class YieldTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels", None)
        preds = model(inputs).squeeze(-1)
        if labels is None:
            loss = preds.new_zeros(())
        else:
            loss = F.mse_loss(preds, labels)
        return (loss, preds) if return_outputs else loss

In [21]:
class YieldDataset(torch.utils.data.Dataset):
    def __init__(self, texts, y, tokenizer, max_length=512):
        self.enc = tokenizer(texts, truncation=True, padding=False, max_length=max_length)
        self.y = y
    def __len__(self): return len(self.y)
    def __getitem__(self, i):
        return {
            "input_ids": torch.tensor(self.enc["input_ids"][i], dtype=torch.long),
            "attention_mask": torch.tensor(self.enc["attention_mask"][i], dtype=torch.long),
            "labels": torch.tensor(self.y[i], dtype=torch.float),  # [%]
        }


In [22]:
@dataclass
class LoopConfig:
    n_rounds: int = 3
    trials_per_round: int = 50
    study_seed: int = 42
    learning_rate: float = 5e-4
    epochs_per_round: int = 5
    weight_decay: float = 0.01
    max_length: int = 512
    batch_size_train: int = 16
    batch_size_eval: int = 32
    val_ratio: float = 0.2
    output_dir: str = "runs/iter_yield"
    log_csv_name: str = "bo_log.csv"

In [23]:
def iterative_optuna_finetune(
    *,
    predict_yield_fn,
    reactant_list,
    reagent_list,
    target_product,
    true_yield_dict,
    tokenizer,
    model,
    cfg: LoopConfig = LoopConfig(),
):
    os.makedirs(cfg.output_dir, exist_ok=True)
    log_csv_path = os.path.join(cfg.output_dir, cfg.log_csv_name)

    # CSVヘッダ（存在しなければ作成）
    if not os.path.exists(log_csv_path):
        with open(log_csv_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow([
                "ts", "round", "trial_index", "reactant", "reagent", "product",
                "pred_yield_pct", "true_yield_pct", "error_pct",
                "was_used_for_ft", "study_best_pred", "study_best_true"
            ])

    # これまで探索した（reactant,reagent）組の重複防止 optional
    seen_pairs: set = set()

    # 進捗
    cumulative_true_texts = []
    cumulative_true_labels = []

    train_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for r in range(1, cfg.n_rounds + 1):
        print(f"\n==== Round {r}/{cfg.n_rounds} ====")

        model.to("cpu")
        try:
            torch.cuda.empty_cache()
        except Exception:
            pass

        round_records = []

        # ---- Optuna Study ----
        storage_path = os.path.join(cfg.output_dir, f"round_{r}.db")
        study = optuna.create_study(
            direction="maximize",
            sampler=TPESampler(seed=cfg.study_seed + r),
            storage=f"sqlite:///{storage_path}",
            study_name=f"yield_round_{r}",
            load_if_exists=True,
        )

        def objective(trial: optuna.Trial) -> float:
            # カテゴリ探索
            reactant = trial.suggest_categorical("reactant", reactant_list)
            reagent  = trial.suggest_categorical("reagent", reagent_list)

            # 重複チェック
            pair = (reactant, reagent, target_product)
            if pair in seen_pairs:
                trial.set_user_attr("duplicate_pair", True)
            else:
                trial.set_user_attr("duplicate_pair", False)

            input_str = f"REACTANT:{reactant}REAGENT:{reagent}PRODUCT:{target_product}"

            # 予測
            try:
                pred_y = float(predict_yield_fn(input_str))  # [%]
            except Exception as e:
                print(f"❌ prediction error: {e}")
                pred_y = 0.0

            # クリッピング（安全策）
            if not math.isfinite(pred_y):
                pred_y = 0.0
            pred_y = max(0.0, min(100.0, pred_y))

            # 真値（%）— 無い場合は 0% を採用
            true = true_yield_dict.get(pair, None)
            if true is None:
                true_pct = 0.0
                error_pct = pred_y - true_pct
                trial.set_user_attr("imputed_true_zero", True)
            else:
                true_pct = float(true) * 100.0
                error_pct = pred_y - true_pct
                trial.set_user_attr("imputed_true_zero", False)

            # エラー
            error_pct = None if true_pct is None else (pred_y - true_pct)

            # Optuna user attrs にも残す
            trial.set_user_attr("reactant", reactant)
            trial.set_user_attr("reagent", reagent)
            trial.set_user_attr("product", target_product)
            trial.set_user_attr("pred_yield_pct", pred_y)
            trial.set_user_attr("true_yield_pct", true_pct)
            trial.set_user_attr("error_pct", error_pct)

            # 一旦メモリにも保存（後でCSV出力）
            round_records.append({
                "reactant": reactant,
                "reagent": reagent,
                "product": target_product,
                "pred_yield_pct": pred_y,
                "true_yield_pct": true_pct,
                "error_pct": error_pct,
            })

            # 目的関数は「予測収率の最大化」
            return pred_y

        study.optimize(objective, n_trials=cfg.trials_per_round, n_jobs=1)

        model.to(train_device)
        
        # ---- ラウンドの結果をCSVへ出力 ----
        best_pred = float(study.best_value) if study.best_value is not None else None
        # best の真値
        best_trial = study.best_trial if study.best_trial else None
        best_true = None
        if best_trial:
            bt_true = best_trial.user_attrs.get("true_yield_pct", None)
            best_true = None if bt_true is None else float(bt_true)

        with open(log_csv_path, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            for idx, rec in enumerate(round_records, start=1):
                writer.writerow([
                    int(time.time()),
                    r,
                    idx,
                    rec["reactant"],
                    rec["reagent"],
                    rec["product"],
                    f'{rec["pred_yield_pct"]:.6f}',
                    "" if rec["true_yield_pct"] is None else f'{rec["true_yield_pct"]:.6f}',
                    "" if rec["error_pct"] is None else f'{rec["error_pct"]:+.6f}',
                    "",  # was_used_for_ft はFT後に上書き
                    "" if best_pred is None else f"{best_pred:.6f}",
                    "" if best_true is None else f"{best_true:.6f}",
                ])

        # ---- ラウンドの真値付きデータで FT 用データセット作成 ----
        texts_round = []
        labels_round = []  # [%]
        for rec in round_records:
            if rec["true_yield_pct"] is not None:
                inp = f"REACTANT:{rec['reactant']}REAGENT:{rec['reagent']}PRODUCT:{rec['product']}"
                texts_round.append(inp)
                labels_round.append(float(rec["true_yield_pct"]))

        if len(texts_round) == 0:
            print(f"[Round {r}] 真値付きデータが0件のため、微調整はスキップします。")
            continue

        # 累積データに追加
        cumulative_true_texts.extend(texts_round)
        cumulative_true_labels.extend(labels_round)

        # ---- 学習/評価分割（十分な件数のときのみ評価）----
        idxs = list(range(len(cumulative_true_texts)))
        random.Random(cfg.study_seed + r).shuffle(idxs)

        n_total = len(idxs)
        n_val = int(n_total * cfg.val_ratio)
        if n_val >= 5:  # 最低5件確保できたときだけ eval
            val_idx = idxs[:n_val]
            train_idx = idxs[n_val:]
        else:
            val_idx = []
            train_idx = idxs

        def subset(lst, sel): return [lst[i] for i in sel]

        train_ds = YieldDataset(
            subset(cumulative_true_texts, train_idx),
            subset(cumulative_true_labels, train_idx),
            tokenizer,
            max_length=cfg.max_length,
        )
        eval_ds = None
        if len(val_idx) > 0:
            eval_ds = YieldDataset(
                subset(cumulative_true_texts, val_idx),
                subset(cumulative_true_labels, val_idx),
                tokenizer,
                max_length=cfg.max_length,
            )

        # ---- Trainer 準備・学習 ----
        out_dir_round = os.path.join(cfg.output_dir, f"round_{r}")
        args = TrainingArguments(
            output_dir=out_dir_round,
            learning_rate=cfg.learning_rate,
            num_train_epochs=cfg.epochs_per_round,
            per_device_train_batch_size=min(cfg.batch_size_train, max(1, len(train_ds))),
            per_device_eval_batch_size=cfg.batch_size_eval,
            weight_decay=cfg.weight_decay,
            logging_steps=50,
            save_strategy="no",
            report_to="none",
            fp16=torch.cuda.is_available(),
            remove_unused_columns=False
        )

        def compute_metrics(eval_pred):
            import numpy as np
            preds = np.array(eval_pred.predictions).reshape(-1)
            labels = np.array(eval_pred.label_ids).reshape(-1)
            mae = float(np.mean(np.abs(preds - labels)))
            rmse = float(np.sqrt(np.mean((preds - labels) ** 2)))
            return {"mae_pct": mae, "rmse_pct": rmse}

        trainer = YieldTrainer(
            model=model,
            args=args,
            train_dataset=train_ds,
            eval_dataset=eval_ds,
            data_collator=CollatorForYield(tokenizer),
            compute_metrics=compute_metrics if eval_ds else None,
        )

        print(f"[Round {r}] Fine-tuning on {len(train_ds)} samples"
              + (f", eval {len(eval_ds)} samples" if eval_ds else ""))

        trainer.train()
        trainer.save_model(out_dir_round)  # fc層を含む全体を保存

        # ---- このラウンドで FT に使った試行を CSV にマーク ----
        # （簡易的に：直近ラウンドの真値付き行の was_used_for_ft を 1 に上書き）
        # 既存CSVを読み書きする
        with open(log_csv_path, "r", encoding="utf-8") as f:
            rows = list(csv.reader(f))
        header = rows[0]
        # カラム位置
        was_used_idx = header.index("was_used_for_ft")
        round_idx = header.index("round")
        trial_idx = header.index("trial_index")
        react_idx = header.index("reactant")
        reag_idx = header.index("reagent")
        prod_idx = header.index("product")

        ft_pairs = {(rec["reactant"], rec["reagent"], rec["product"]) for rec in round_records if rec["true_yield_pct"] is not None}
        for i in range(1, len(rows)):
            row = rows[i]
            if int(row[round_idx]) == r and (row[react_idx], row[reag_idx], row[prod_idx]) in ft_pairs:
                row[was_used_idx] = "1"
        with open(log_csv_path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerows(rows)

        # seen に今回の組み合わせを加えて、以降の重複を（弱く）抑止
        for rec in round_records:
            seen_pairs.add((rec["reactant"], rec["reagent"]))

    print("\nDone. Logs:")
    print(f"- Trials CSV: {log_csv_path}")
    print(f"- Optuna DBs: {cfg.output_dir}/round_*.db")
    print(f"- Checkpoints per round: {cfg.output_dir}/round_*/")

In [24]:
cfg = LoopConfig(
    n_rounds=10,
    trials_per_round=10,
    study_seed=42,
    learning_rate=5e-4,
    epochs_per_round=5,
    weight_decay=0.01,
    max_length=512,
    batch_size_train=16,
    batch_size_eval=32,
    val_ratio=0.2,
    output_dir="runs/10rounds_10_trials_yield",
)

In [25]:
iterative_optuna_finetune(
    predict_yield_fn=predict_yield,
    reactant_list=reactant_list,
    reagent_list=reagent_list,
    target_product=target_product,
    true_yield_dict=true_yield_dict,
    tokenizer=yield_tokenizer,
    model=yield_model,
    cfg=cfg,
)


==== Round 1/10 ====


[I 2025-08-11 14:13:13,034] A new study created in RDB with name: yield_round_1
[I 2025-08-11 14:13:13,320] Trial 0 finished with value: 42.4525032043457 and parameters: {'reactant': 'Cc1cc(F)ccc1Cl.OB(O)B(O)O', 'reagent': '.c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 0 with value: 42.4525032043457.
[I 2025-08-11 14:13:13,610] Trial 1 finished with value: 67.52852630615234 and parameters: {'reactant': 'Clc1ccc2[nH]ccc2c1.OB(O)B(O)O', 'reagent': '.CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1'}. Best is trial 1 with value: 67.52852630615234.
[I 2025-08-11 14:13:13,893] Trial 2 finished with value: 43.33335876464844 and parameters: {'reactant': 'COC(=O)c1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.C[PH+](C)C.F[B-](F)(F)F'}. Best is trial 1 with value: 67.52852630615234.
[I 2025-08-11 14:13:14,177] Trial 3 finished with value: 59.327762603759766 and parameters: {'reactant': 'Brc1cnc2ccccc2c1.OB(O)B(O)O', 'reagent': '.COc1ccc(P(c2ccc(OC)cc2)c2ccc(OC)cc2)cc1'}. Best is

[Round 1] Fine-tuning on 10 samples
{'train_runtime': 0.641, 'train_samples_per_second': 78.0, 'train_steps_per_second': 7.8, 'train_loss': 2435.675, 'epoch': 5.0}

==== Round 2/10 ====


[I 2025-08-11 14:13:19,832] A new study created in RDB with name: yield_round_2
[I 2025-08-11 14:13:20,175] Trial 0 finished with value: 44.28371810913086 and parameters: {'reactant': 'CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.CC(C)(C)P(c1ccccc1)C(C)(C)C'}. Best is trial 0 with value: 44.28371810913086.
[I 2025-08-11 14:13:20,505] Trial 1 finished with value: 50.8960075378418 and parameters: {'reactant': 'Cc1cccc(C)c1Cl.OB(O)B(O)O', 'reagent': '.CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F'}. Best is trial 1 with value: 50.8960075378418.
[I 2025-08-11 14:13:20,778] Trial 2 finished with value: 41.82666778564453 and parameters: {'reactant': 'COc1ccc(Cl)cc1F.OB(O)B(O)O', 'reagent': '.CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1'}. Best is trial 1 with value: 50.8960075378418.
[I 2025-08-11 14:13:21,051] Trial 3 finished with value: 42.4200439453125 and parameters: {'reactant': 'CCOC(=O)c1ccc(Cl)c(F)c1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)C2CCCCC2

[Round 2] Fine-tuning on 20 samples
{'train_runtime': 0.6237, 'train_samples_per_second': 160.333, 'train_steps_per_second': 16.033, 'train_loss': 2645.7228515625, 'epoch': 5.0}

==== Round 3/10 ====


[I 2025-08-11 14:13:25,055] A new study created in RDB with name: yield_round_3
[I 2025-08-11 14:13:25,357] Trial 0 finished with value: 69.13774871826172 and parameters: {'reactant': 'CN(C)S(=O)(=O)Oc1ccc2ncccc2c1.OB(O)B(O)O', 'reagent': '.c1ccc(-n2cccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 0 with value: 69.13774871826172.
[I 2025-08-11 14:13:25,642] Trial 1 finished with value: 59.49842834472656 and parameters: {'reactant': 'COc1cc(Cl)ccc1F.OB(O)B(O)O', 'reagent': '.c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 0 with value: 69.13774871826172.
[I 2025-08-11 14:13:25,943] Trial 2 finished with value: 24.916404724121094 and parameters: {'reactant': 'COc1ccc(Cl)cc1F.OB(O)B(O)O', 'reagent': '.c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n2c3ccccc3c3ccccc32)c1'}. Best is trial 0 with value: 69.13774871826172.
[I 2025-08-11 14:13:26,249] Trial 3 finished with value: 47.6738395690918 and parameters: {'reactant': 'FC(F)(F)c1ccc(Br)cc1.OB(O)B(O)O', 'reagent': '.CC(C)(C)P(c1ccccc1)C(C)(C)C'}. Bes

[Round 3] Fine-tuning on 24 samples, eval 6 samples
{'train_runtime': 0.5893, 'train_samples_per_second': 203.641, 'train_steps_per_second': 16.97, 'train_loss': 2801.68984375, 'epoch': 5.0}

==== Round 4/10 ====


[I 2025-08-11 14:13:30,187] A new study created in RDB with name: yield_round_4
[I 2025-08-11 14:13:30,499] Trial 0 finished with value: 39.66865921020508 and parameters: {'reactant': 'Clc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.CN(C)c1ccccc1-c1ccccc1P(c1ccccc1)c1ccccc1'}. Best is trial 0 with value: 39.66865921020508.
[I 2025-08-11 14:13:30,796] Trial 1 finished with value: 56.590797424316406 and parameters: {'reactant': 'CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)c2ccccc2)cc1'}. Best is trial 1 with value: 56.590797424316406.
[I 2025-08-11 14:13:31,119] Trial 2 finished with value: 57.93023681640625 and parameters: {'reactant': 'FC(F)(F)c1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2cc(C)cc(C)c2)c1'}. Best is trial 2 with value: 57.93023681640625.
[I 2025-08-11 14:13:31,421] Trial 3 finished with value: 46.10227584838867 and parameters: {'reactant': 'CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.CC(C)(C)P(c1ccccc1)

[Round 4] Fine-tuning on 32 samples, eval 8 samples
{'train_runtime': 0.6586, 'train_samples_per_second': 242.954, 'train_steps_per_second': 15.185, 'train_loss': 2853.3443359375, 'epoch': 5.0}

==== Round 5/10 ====


[I 2025-08-11 14:13:35,345] A new study created in RDB with name: yield_round_5
[I 2025-08-11 14:13:35,566] Trial 0 finished with value: 48.309173583984375 and parameters: {'reactant': 'Brc1ccc2c(c1)OCO2.OB(O)B(O)O', 'reagent': '.Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 48.309173583984375.
[I 2025-08-11 14:13:35,790] Trial 1 finished with value: 35.72278594970703 and parameters: {'reactant': 'Cc1ncccc1Br.OB(O)B(O)O', 'reagent': '.c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n2c3ccccc3c3ccccc32)c1'}. Best is trial 0 with value: 48.309173583984375.
[I 2025-08-11 14:13:35,996] Trial 2 finished with value: 43.872798919677734 and parameters: {'reactant': 'Brc1ccsc1.OB(O)B(O)O', 'reagent': '.CC(C)(C)P(c1ccccc1)C(C)(C)C'}. Best is trial 0 with value: 48.309173583984375.
[I 2025-08-11 14:13:36,214] Trial 3 finished with value: 61.03029251098633 and parameters: {'reactant': 'CN(C)S(=O)(=O)Oc1ccc2ncccc2c1.OB(O)B(O)O', 'reagent': '.Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1'}. Best is trial

[Round 5] Fine-tuning on 40 samples, eval 10 samples
{'train_runtime': 0.8766, 'train_samples_per_second': 228.155, 'train_steps_per_second': 17.112, 'train_loss': 2785.9125, 'epoch': 5.0}

==== Round 6/10 ====


[I 2025-08-11 14:13:39,751] A new study created in RDB with name: yield_round_6
[I 2025-08-11 14:13:39,994] Trial 0 finished with value: 53.049461364746094 and parameters: {'reactant': 'Brc1ccc2c(c1)OCO2.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)C2CCCCC2)cc1'}. Best is trial 0 with value: 53.049461364746094.
[I 2025-08-11 14:13:40,214] Trial 1 finished with value: 55.20258712768555 and parameters: {'reactant': 'Brc1ccsc1.OB(O)B(O)O', 'reagent': '.COc1ccc(P(c2ccc(OC)cc2)c2ccc(OC)cc2)cc1'}. Best is trial 1 with value: 55.20258712768555.
[I 2025-08-11 14:13:40,425] Trial 2 finished with value: 57.3232307434082 and parameters: {'reactant': 'CN(C)C(=O)c1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.c1ccc(P(c2ccccc2)C2CCCCC2)cc1'}. Best is trial 2 with value: 57.3232307434082.
[I 2025-08-11 14:13:40,655] Trial 3 finished with value: 20.34602165222168 and parameters: {'reactant': 'Brc1ccccc1-c1ccccc1.OB(O)B(O)O', 'reagent': '.CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F'}. Best is t

[Round 6] Fine-tuning on 48 samples, eval 12 samples
{'train_runtime': 0.9515, 'train_samples_per_second': 252.239, 'train_steps_per_second': 15.765, 'train_loss': 2567.4348958333335, 'epoch': 5.0}

==== Round 7/10 ====


[I 2025-08-11 14:13:44,300] A new study created in RDB with name: yield_round_7
[I 2025-08-11 14:13:44,584] Trial 0 finished with value: 28.70975685119629 and parameters: {'reactant': 'COc1ccc(Br)cc1.OB(O)B(O)O', 'reagent': '.CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 28.70975685119629.
[I 2025-08-11 14:13:44,857] Trial 1 finished with value: 52.141292572021484 and parameters: {'reactant': 'CCOC(=O)c1cc(Br)cn1CC.OB(O)B(O)O', 'reagent': '.CC(C)(C)P(c1ccccc1)C(C)(C)C'}. Best is trial 1 with value: 52.141292572021484.
[I 2025-08-11 14:13:45,154] Trial 2 finished with value: 68.51814270019531 and parameters: {'reactant': 'Clc1cnc2ccccc2c1.OB(O)B(O)O', 'reagent': '.COc1ccc(P(c2ccc(OC)cc2)c2ccc(OC)cc2)cc1'}. Best is trial 2 with value: 68.51814270019531.
[I 2025-08-11 14:13:45,484] Trial 3 finished with value: 63.468955993652344 and parameters: {'reactant': 'CCOC(=O)c1cc(Br)cn1CC.OB(O)B(O)O', 'reagent': '.COc1cc(C(C)(C)C)cc(C(C)(C)C)c1-c1ccccc1P(C1CCCCC1)C1CCC

[Round 7] Fine-tuning on 56 samples, eval 14 samples
{'train_runtime': 1.2343, 'train_samples_per_second': 226.849, 'train_steps_per_second': 16.203, 'train_loss': 2646.1853515625, 'epoch': 5.0}

==== Round 8/10 ====


[I 2025-08-11 14:13:49,858] A new study created in RDB with name: yield_round_8
[I 2025-08-11 14:13:50,162] Trial 0 finished with value: 51.210235595703125 and parameters: {'reactant': 'Brc1cnc2ccccc2c1.OB(O)B(O)O', 'reagent': '.CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 51.210235595703125.
[I 2025-08-11 14:13:50,458] Trial 1 finished with value: 51.990692138671875 and parameters: {'reactant': 'COc1cc(Cl)ccc1F.OB(O)B(O)O', 'reagent': '.c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 1 with value: 51.990692138671875.
[I 2025-08-11 14:13:50,758] Trial 2 finished with value: 50.29109573364258 and parameters: {'reactant': 'COc1ccc(Br)cc1.OB(O)B(O)O', 'reagent': '.CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F'}. Best is trial 1 with value: 51.990692138671875.
[I 2025-08-11 14:13:51,042] Trial 3 finished with value: 38.96900939941406 and parameters: {'reactant': 'N#Cc1ccc(Br)cc1F.OB(O)B(O)O', 'reagent': '.c1ccc(P(C2CCCCC2)C2CCCCC2)c(-n

[Round 8] Fine-tuning on 64 samples, eval 16 samples
{'train_runtime': 1.2388, 'train_samples_per_second': 258.309, 'train_steps_per_second': 16.144, 'train_loss': 2656.46953125, 'epoch': 5.0}

==== Round 9/10 ====


[I 2025-08-11 14:13:55,678] A new study created in RDB with name: yield_round_9
[I 2025-08-11 14:13:55,968] Trial 0 finished with value: 57.391090393066406 and parameters: {'reactant': 'Brc1cnc2ccccc2c1.OB(O)B(O)O', 'reagent': '.C[PH+](C)C.F[B-](F)(F)F'}. Best is trial 0 with value: 57.391090393066406.
[I 2025-08-11 14:13:56,256] Trial 1 finished with value: 47.387542724609375 and parameters: {'reactant': 'COC(=O)c1ccc(Cl)cc1.OB(O)B(O)O', 'reagent': '.Fc1ccc(P(c2ccc(F)cc2)c2ccc(F)cc2)cc1'}. Best is trial 0 with value: 57.391090393066406.
[I 2025-08-11 14:13:56,530] Trial 2 finished with value: 47.99216842651367 and parameters: {'reactant': 'Brc1ccsc1.OB(O)B(O)O', 'reagent': '.Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 57.391090393066406.
[I 2025-08-11 14:13:56,828] Trial 3 finished with value: 53.527748107910156 and parameters: {'reactant': 'Clc1ccc(-c2ccccc2)cc1.OB(O)B(O)O', 'reagent': '.Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1'}. Best is trial 0 with value: 57.

[Round 9] Fine-tuning on 72 samples, eval 18 samples
{'train_runtime': 1.6374, 'train_samples_per_second': 219.863, 'train_steps_per_second': 15.268, 'train_loss': 2295.14140625, 'epoch': 5.0}

==== Round 10/10 ====


[I 2025-08-11 14:14:01,619] A new study created in RDB with name: yield_round_10
[I 2025-08-11 14:14:01,931] Trial 0 finished with value: 34.8645133972168 and parameters: {'reactant': 'CCOC(=O)c1ccc(Cl)c(F)c1.OB(O)B(O)O', 'reagent': '.C[PH+](C)C.F[B-](F)(F)F'}. Best is trial 0 with value: 34.8645133972168.
[I 2025-08-11 14:14:02,244] Trial 1 finished with value: 60.60206985473633 and parameters: {'reactant': 'Clc1ccc2[nH]ccc2c1.OB(O)B(O)O', 'reagent': '.CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F'}. Best is trial 1 with value: 60.60206985473633.
[I 2025-08-11 14:14:02,525] Trial 2 finished with value: 57.71529769897461 and parameters: {'reactant': 'Clc1ccc2[nH]ccc2c1.OB(O)B(O)O', 'reagent': '.c1ccc(-n2cccc2P(C2CCCCC2)C2CCCCC2)cc1'}. Best is trial 1 with value: 60.60206985473633.
[I 2025-08-11 14:14:02,822] Trial 3 finished with value: 45.23029708862305 and parameters: {'reactant': 'CCOC(=O)c1cc(Br)cn1CC.OB(O)B(O)O', 'reagent': '.c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC

[Round 10] Fine-tuning on 80 samples, eval 20 samples
{'train_runtime': 1.6551, 'train_samples_per_second': 241.673, 'train_steps_per_second': 15.105, 'train_loss': 1207.054140625, 'epoch': 5.0}

Done. Logs:
- Trials CSV: runs/10rounds_10_trials_yield/bo_log.csv
- Optuna DBs: runs/10rounds_10_trials_yield/round_*.db
- Checkpoints per round: runs/10rounds_10_trials_yield/round_*/


In [26]:
import pandas as pd
import matplotlib.pyplot as plt

In [27]:
def visualize_bo_logs(csv_path: str, out_dir: str | None = None, show: bool = False, dpi: int = 180):
    # 出力ディレクトリ（集約）
    root = out_dir or (os.path.dirname(csv_path) or ".")
    save_dir = os.path.join(root, "bo_viz")
    os.makedirs(save_dir, exist_ok=True)

    # ロード & 型整形
    df = pd.read_csv(csv_path)
    to_num_cols = [
        "round", "trial_index", "pred_yield_pct", "true_yield_pct",
        "error_pct", "study_best_pred", "study_best_true"
    ]
    for c in to_num_cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    # 真値ありデータ
    df_obs = df.dropna(subset=["true_yield_pct"]) if "true_yield_pct" in df.columns else pd.DataFrame()
    # error_pct が無い/NaN の場合は再計算
    if not df_obs.empty:
        df_obs["error_pct"] = df_obs["pred_yield_pct"] - df_obs["true_yield_pct"]

    # ===== 全体指標 =====
    overall = {}
    if not df_obs.empty:
        y = df_obs["true_yield_pct"].to_numpy()
        yhat = df_obs["pred_yield_pct"].to_numpy()
        err = yhat - y
        overall = {
            "n": int(len(df_obs)),
            "mae_pct": float(np.mean(np.abs(err))),
            "rmse_pct": float(np.sqrt(np.mean(err**2))),
            "bias_pct": float(np.mean(err)),
            "r2": float(1 - np.sum(err**2) / np.sum((y - y.mean())**2)) if len(df_obs) > 1 else np.nan,
        }
    pd.DataFrame([overall]).to_csv(os.path.join(save_dir, "metrics_overall.csv"), index=False)

    # ===== ラウンド別指標（MAE/RMSE/Bias/R2）=====
    metrics_by_round = []
    if not df_obs.empty and "round" in df_obs.columns:
        for r, d in df_obs.groupby("round", dropna=True):
            y = d["true_yield_pct"].to_numpy()
            yhat = d["pred_yield_pct"].to_numpy()
            err = yhat - y
            metrics_by_round.append({
                "round": int(r),
                "n": int(len(d)),
                "mae_pct": float(np.mean(np.abs(err))),
                "rmse_pct": float(np.sqrt(np.mean(err**2))),
                "bias_pct": float(np.mean(err)),
                "r2": float(1 - np.sum(err**2) / np.sum((y - y.mean())**2)) if len(d) > 1 else np.nan,
            })
    mdf = (pd.DataFrame(metrics_by_round)
             .sort_values("round")
             if metrics_by_round else pd.DataFrame(columns=["round","n","mae_pct","rmse_pct","bias_pct","r2"]))
    mdf.to_csv(os.path.join(save_dir, "metrics_by_round.csv"), index=False)

    # ===== 図: パリティ =====
    if not df_obs.empty:
        fig = plt.figure(figsize=(5, 5))
        plt.scatter(df_obs["true_yield_pct"], df_obs["pred_yield_pct"], s=18, alpha=0.65)
        lims = [0, 100]
        plt.plot(lims, lims, linestyle="--")
        plt.xlim(lims); plt.ylim(lims)
        plt.xlabel("True Yield [%]"); plt.ylabel("Predicted Yield [%]")
        plt.title("Parity: Prediction vs Truth")
        fig.savefig(os.path.join(save_dir, "parity.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: 誤差ヒスト =====
    if not df_obs.empty and not df_obs["error_pct"].dropna().empty:
        fig = plt.figure(figsize=(6, 4))
        plt.hist(df_obs["error_pct"].dropna().to_numpy(), bins=30)
        plt.xlabel("Prediction Error [%]  (pred - true)")
        plt.ylabel("Count")
        plt.title("Error Histogram")
        fig.savefig(os.path.join(save_dir, "error_hist.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: ラウンド別ベスト真値 =====
    if not df_obs.empty and "round" in df_obs.columns:
        best_by_round = df_obs.groupby("round")["true_yield_pct"].max()
        fig = plt.figure(figsize=(6, 4))
        plt.plot(best_by_round.index, best_by_round.values, marker="o")
        plt.xlabel("Round"); plt.ylabel("Best Observed True Yield [%]")
        plt.title("Best True Yield per Round"); plt.grid(True, alpha=0.3)
        fig.savefig(os.path.join(save_dir, "best_true_per_round.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: キャリブレーション =====
    if not df_obs.empty:
        bins = np.linspace(0, 100, 11)  # 10ビン
        cut = pd.cut(df_obs["pred_yield_pct"], bins, include_lowest=True)
        calib = df_obs.groupby(cut).agg(
            pred_mean=("pred_yield_pct", "mean"),
            true_mean=("true_yield_pct", "mean"),
            n=("true_yield_pct", "size")
        ).dropna()
        if not calib.empty:
            fig = plt.figure(figsize=(6, 4))
            lims = [0, 100]
            plt.plot(calib["pred_mean"], calib["true_mean"], marker="o")
            plt.plot(lims, lims, linestyle="--")
            plt.xlim(lims); plt.ylim(lims)
            plt.xlabel("Predicted Mean (per bin) [%]")
            plt.ylabel("Observed Mean (per bin) [%]")
            plt.title("Calibration Curve")
            fig.savefig(os.path.join(save_dir, "calibration.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)
            calib.to_csv(os.path.join(save_dir, "calibration_table.csv"))

    # ===== 図: ラウンド別（真値/予測）の箱ひげ =====
    if not df_obs.empty and "round" in df_obs.columns:
        rounds = sorted(df_obs["round"].dropna().unique())
        if len(rounds) > 0:
            fig = plt.figure(figsize=(7, 4))
            pos = np.array(rounds, dtype=float)
            data_true = [df_obs[df_obs["round"] == r]["true_yield_pct"].to_numpy() for r in rounds]
            data_pred = [df_obs[df_obs["round"] == r]["pred_yield_pct"].to_numpy() for r in rounds]
            plt.boxplot(data_true, positions=pos - 0.15, widths=0.25, patch_artist=True)
            plt.boxplot(data_pred, positions=pos + 0.15, widths=0.25, patch_artist=True)
            plt.xticks(rounds)
            plt.xlabel("Round"); plt.ylabel("Yield [%]")
            plt.title("Distributions per Round (True vs Pred)")
            fig.savefig(os.path.join(save_dir, "round_box.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    # ===== NEW: ラウンド別の誤差分布（箱ひげ）=====
    if not df_obs.empty and "round" in df_obs.columns:
        rounds = sorted(df_obs["round"].dropna().unique())
        if len(rounds) > 0:
            fig = plt.figure(figsize=(7, 4))
            data_err = [df_obs[df_obs["round"] == r]["error_pct"].dropna().to_numpy() for r in rounds]
            plt.boxplot(data_err, positions=np.array(rounds, dtype=float), widths=0.5, patch_artist=True)
            plt.axhline(0.0, linestyle="--")
            plt.xticks(rounds)
            plt.xlabel("Round"); plt.ylabel("Prediction Error (pred - true) [%]")
            plt.title("Prediction Error by Round (Boxplot)")
            fig.savefig(os.path.join(save_dir, "error_box_by_round.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    # ===== NEW: ラウンド別の MAE/RMSE/Bias 推移 =====
    if not mdf.empty:
        fig = plt.figure(figsize=(7, 4))
        plt.plot(mdf["round"], mdf["mae_pct"], marker="o", label="MAE [%]")
        plt.plot(mdf["round"], mdf["rmse_pct"], marker="o", label="RMSE [%]")
        plt.plot(mdf["round"], mdf["bias_pct"], marker="o", label="Bias (pred-true) [%]")
        plt.xlabel("Round"); plt.ylabel("Error [%]")
        plt.title("Prediction Error by Round")
        plt.grid(True, alpha=0.3); plt.legend()
        fig.savefig(os.path.join(save_dir, "error_by_round.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== Optuna study best =====
    if "study_best_pred" in df.columns:
        best_df = df.dropna(subset=["round", "study_best_pred"]).groupby("round").agg(
            best_pred=("study_best_pred", "max"),
            best_true=("study_best_true", "max")
        ).reset_index()
        if not best_df.empty:
            fig = plt.figure(figsize=(6, 4))
            plt.plot(best_df["round"], best_df["best_pred"], marker="o", label="Study Best Pred [%]")
            if "study_best_true" in best_df.columns and best_df["best_true"].notna().any():
                plt.plot(best_df["round"], best_df["best_true"], marker="o", label="Study Best True [%]")
            plt.xlabel("Round"); plt.ylabel("Yield [%]")
            plt.title("Optuna Study Best per Round"); plt.grid(True, alpha=0.3); plt.legend()
            fig.savefig(os.path.join(save_dir, "study_best.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    return {"overall": overall, "by_round": mdf, "save_dir": save_dir}


In [28]:
results = visualize_bo_logs("runs/10rounds_10_trials_yield/bo_log.csv", show=False)
print(results["overall"])

  "r2": float(1 - np.sum(err**2) / np.sum((y - y.mean())**2)) if len(d) > 1 else np.nan,
  calib = df_obs.groupby(cut).agg(


{'n': 100, 'mae_pct': 47.77434896999999, 'rmse_pct': 49.42323617180193, 'bias_pct': 46.29600007, 'r2': -7.879602744060826}
