## データセットの準備

In [40]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

In [41]:
def space_clean(row):
    row = row.replace(". ", "").replace(" .", "").replace("  ", " ")
    return row


def canonicalize(smiles):
    try:
        new_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles), canonical=True)
    except:
        new_smiles = None
    return new_smiles

In [42]:
df = pd.read_csv("./data/inchi_23l_reaction_t5_ready.csv")

In [43]:
# 必須カラムの存在チェックと補完
required_cols = ["REACTANT", "CATALYST", "REAGENT", "SOLVENT", "PRODUCT"]
for col in required_cols:
    if col not in df.columns:
        df[col] = ""

# 必要に応じてYIELDを標準化（0-1に正規化）
if "YIELD" in df.columns and df["YIELD"].max() >= 100:
    df["YIELD"] = df["YIELD"].clip(0, 100) / 100
else:
    df["YIELD"] = None

In [44]:
for col in ["REAGENT", "REACTANT", "PRODUCT"]:
    df[col] = df[col].apply(space_clean)
    df[col] = df[col].apply(lambda x: canonicalize(x) if x != " " else " ")
    df = df[~df[col].isna()].reset_index(drop=True)
    df[col] = df[col].apply(lambda x: ".".join(sorted(x.split("."))))

In [45]:
df["REAGENT"] = df["CATALYST"].fillna(" ") + "." + df["REAGENT"].fillna(" ")

In [46]:
df = df.loc[df[["YIELD"]].drop_duplicates().index].reset_index(drop=True)

## モデルの読み込み

In [47]:
import numpy as np
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration, AutoConfig, PreTrainedModel

import logging
logging.getLogger("transformers").setLevel(logging.ERROR)

In [48]:
class ReactionT5Yield(PreTrainedModel):
    config_class  = AutoConfig
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.model = T5ForConditionalGeneration.from_pretrained(self.config._name_or_path)
        self.model.resize_token_embeddings(self.config.vocab_size)
        self.fc1 = nn.Linear(self.config.hidden_size, self.config.hidden_size//2)
        self.fc2 = nn.Linear(self.config.hidden_size, self.config.hidden_size//2)
        self.fc3 = nn.Linear(self.config.hidden_size//2*2, self.config.hidden_size)
        self.fc4 = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        self.fc5 = nn.Linear(self.config.hidden_size, 1)

        self._init_weights(self.fc1)
        self._init_weights(self.fc2)
        self._init_weights(self.fc3)
        self._init_weights(self.fc4)
        self._init_weights(self.fc5)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=0.01)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=0.01)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def forward(self, inputs):
        device = inputs['input_ids'].device

        with torch.no_grad():
            encoder_outputs = self.model.encoder(
                input_ids=inputs['input_ids'],
                attention_mask=inputs.get('attention_mask', None),
            )
            encoder_hidden_states = encoder_outputs[0]  # (B, L, H)

            dec_input_ids = torch.full(
                (inputs['input_ids'].size(0), 1),
                self.config.decoder_start_token_id,
                dtype=torch.long,
                device=device,
            )

            outputs = self.model.decoder(
                input_ids=dec_input_ids,
                encoder_hidden_states=encoder_hidden_states,
            )
            last_hidden_states = outputs[0]  # (B, 1, H)

        output1 = self.fc1(last_hidden_states.view(-1, self.config.hidden_size))
        output2 = self.fc2(encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size))
        output = self.fc3(torch.hstack((output1, output2)))
        output = self.fc4(output)
        output = self.fc5(output)
        return output * 100



In [49]:
# 収率予測（スカラー出力）
yield_tokenizer = AutoTokenizer.from_pretrained("sagawa/ReactionT5v2-yield")
yield_model = ReactionT5Yield.from_pretrained("sagawa/ReactionT5v2-yield")

In [50]:
def predict_yield(input_str: str) -> float:
    inputs = yield_tokenizer([input_str], return_tensors="pt", truncation=True)
    with torch.no_grad():
        output = yield_model(inputs)
    return output.item()

## Optunaによるベイズ最適化

In [51]:
import optuna

In [52]:
target_product = "OB(O)c1ccc2[nH]ccc2c1"

In [53]:
reactant_list = sorted(df["REACTANT"].unique())
reagent_list = sorted(df["REAGENT"].unique())
product_list = sorted(df["PRODUCT"].unique())

In [54]:
len(reactant_list), len(reagent_list), len(product_list)

(33, 23, 24)

In [55]:
valid_combinations = list(df[['REACTANT', 'REAGENT', 'PRODUCT']].itertuples(index=False, name=None))

In [56]:
true_yield_dict = {
    (row["REACTANT"], row["REAGENT"], row["PRODUCT"]): row["YIELD"]
    for _, row in df.iterrows()
}

In [57]:
def objective(trial):

    yield_model.to("cpu")
    try:
        torch.cuda.empty_cache()
    except Exception:
        pass
    
    combo = trial.suggest_categorical("combo", valid_combinations)
    reactant, reagent, product = combo

    input_str = f"REACTANT:{reactant}REAGENT:{reagent}PRODUCT:{product}"

    try:
        pred_yield = predict_yield(input_str)

        # ground truth を取得
        key = (reactant, reagent, product)
        if key not in true_yield_dict:
            print(f"❗ No ground truth for: {reactant} + {reagent} → {product}")
            true_yield = 0.0
        else:
            true_yield = true_yield_dict.get(key)

        # 誤差の計算
        if true_yield is not None:
            true_yield_pct = true_yield * 100
            error = pred_yield - true_yield_pct 
            print(f"🔎 {reactant} + {reagent} → {product}")
            print(f"   📈 Predicted: {pred_yield:.2f}%")
            print(f"   🧪 Ground truth: {true_yield_pct:.2f}%" if true_yield is not None else "   🧪 Ground truth: None")
            print(f"   ❗ Error: {error:+.2f}%")
        else:
            print(f"❔ No ground truth for: {reactant} + {reagent}")
            error = None

        if pred_yield < 0 or pred_yield > 100:
            return 0.0

        return pred_yield  # 目的関数は「予測収率の最大化」
    except Exception as e:
        print(f"❌ Error during trial: {e}")
        return 0.0

In [58]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

[I 2025-08-12 05:50:15,440] A new study created in memory with name: no-name-e4c58f8d-91de-4742-b52b-1f5c49d3bf6b
[I 2025-08-12 05:50:15,618] Trial 0 finished with value: 69.58032989501953 and parameters: {'combo': ('CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1', 'CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F.OB(O)B(O)O', 'OB(O)c1ccc(-c2ccccc2)cc1')}. Best is trial 0 with value: 69.58032989501953.
[I 2025-08-12 05:50:15,725] Trial 1 finished with value: 57.89282989501953 and parameters: {'combo': ('Brc1ccc2occc2c1', 'C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F.OB(O)B(O)O', 'OB(O)c1ccc2occc2c1')}. Best is trial 0 with value: 69.58032989501953.
[I 2025-08-12 05:50:15,809] Trial 2 finished with value: 74.84309387207031 and parameters: {'combo': ('COc1cc(Cl)ccc1F', 'c1ccc(P(c2ccccc2)C2CCCCC2)cc1.OB(O)B(O)O', 'COc1cc(B(O)O)ccc1F')}. Best is trial 2 with value: 74.84309387207031.


🔎 CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1 + CCCCC1([PH+](C2CCCCC2)C2CCCCC2)c2ccccc2-c2ccccc21.F[B-](F)(F)F.OB(O)B(O)O → OB(O)c1ccc(-c2ccccc2)cc1
   📈 Predicted: 69.58%
   🧪 Ground truth: 35.75%
   ❗ Error: +33.83%
🔎 Brc1ccc2occc2c1 + C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F.OB(O)B(O)O → OB(O)c1ccc2occc2c1
   📈 Predicted: 57.89%
   🧪 Ground truth: 8.12%
   ❗ Error: +49.77%
🔎 COc1cc(Cl)ccc1F + c1ccc(P(c2ccccc2)C2CCCCC2)cc1.OB(O)B(O)O → COc1cc(B(O)O)ccc1F
   📈 Predicted: 74.84%
   🧪 Ground truth: 49.17%
   ❗ Error: +25.67%


[I 2025-08-12 05:50:15,901] Trial 3 finished with value: 67.0797348022461 and parameters: {'combo': ('N#Cc1ccc(Cl)cc1F', 'C[PH+](C)C.F[B-](F)(F)F.OB(O)B(O)O', 'N#Cc1ccc(B(O)O)cc1F')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,051] Trial 4 finished with value: 74.1753158569336 and parameters: {'combo': ('Brc1ccccc1-c1ccccc1', 'COc1ccc(P(c2ccc(OC)cc2)c2ccc(OC)cc2)cc1.OB(O)B(O)O', 'OB(O)c1ccccc1-c1ccccc1')}. Best is trial 2 with value: 74.84309387207031.


🔎 N#Cc1ccc(Cl)cc1F + C[PH+](C)C.F[B-](F)(F)F.OB(O)B(O)O → N#Cc1ccc(B(O)O)cc1F
   📈 Predicted: 67.08%
   🧪 Ground truth: 23.36%
   ❗ Error: +43.72%
🔎 Brc1ccccc1-c1ccccc1 + COc1ccc(P(c2ccc(OC)cc2)c2ccc(OC)cc2)cc1.OB(O)B(O)O → OB(O)c1ccccc1-c1ccccc1
   📈 Predicted: 74.18%
   🧪 Ground truth: 53.74%
   ❗ Error: +20.44%


[I 2025-08-12 05:50:16,146] Trial 5 finished with value: 66.2846450805664 and parameters: {'combo': ('Clc1cnc2ccccc2c1', 'CC(C)(C)P(c1ccccc1)C(C)(C)C.OB(O)B(O)O', 'OB(O)c1cnc2ccccc2c1')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,250] Trial 6 finished with value: 55.57122039794922 and parameters: {'combo': ('Cc1cccc(C)c1Cl', 'C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F.OB(O)B(O)O', 'Cc1cccc(C)c1B(O)O')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,346] Trial 7 finished with value: 73.41165924072266 and parameters: {'combo': ('COc1ccc(Br)cc1F', 'COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1.OB(O)B(O)O', 'COc1ccc(B(O)O)cc1F')}. Best is trial 2 with value: 74.84309387207031.


🔎 Clc1cnc2ccccc2c1 + CC(C)(C)P(c1ccccc1)C(C)(C)C.OB(O)B(O)O → OB(O)c1cnc2ccccc2c1
   📈 Predicted: 66.28%
   🧪 Ground truth: 12.22%
   ❗ Error: +54.06%
🔎 Cc1cccc(C)c1Cl + C1CCC([PH+](C2CCCCC2)C2CCCCC2)CC1.F[B-](F)(F)F.OB(O)B(O)O → Cc1cccc(C)c1B(O)O
   📈 Predicted: 55.57%
   🧪 Ground truth: 0.67%
   ❗ Error: +54.90%
🔎 COc1ccc(Br)cc1F + COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1.OB(O)B(O)O → COc1ccc(B(O)O)cc1F
   📈 Predicted: 73.41%
   🧪 Ground truth: 64.83%
   ❗ Error: +8.58%


[I 2025-08-12 05:50:16,437] Trial 8 finished with value: 63.16690444946289 and parameters: {'combo': ('Cc1ncccc1Br', 'COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1.OB(O)B(O)O', 'Cc1ncccc1B(O)O')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,525] Trial 9 finished with value: 68.51827239990234 and parameters: {'combo': ('COc1ccc(Cl)cc1F', 'Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O', 'COc1ccc(B(O)O)cc1F')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,621] Trial 10 finished with value: 71.50020599365234 and parameters: {'combo': ('Clc1ccc2[nH]ccc2c1', 'c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1.OB(O)B(O)O', 'OB(O)c1ccc2[nH]ccc2c1')}. Best is trial 2 with value: 74.84309387207031.


🔎 Cc1ncccc1Br + COc1cccc(OC)c1-c1ccccc1P(c1ccccc1)c1ccccc1.OB(O)B(O)O → Cc1ncccc1B(O)O
   📈 Predicted: 63.17%
   🧪 Ground truth: 83.74%
   ❗ Error: -20.58%
🔎 COc1ccc(Cl)cc1F + Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O → COc1ccc(B(O)O)cc1F
   📈 Predicted: 68.52%
   🧪 Ground truth: 59.07%
   ❗ Error: +9.45%
🔎 Clc1ccc2[nH]ccc2c1 + c1ccc(-c2cc3ccccc3n2P(C2CCCCC2)C2CCCCC2)cc1.OB(O)B(O)O → OB(O)c1ccc2[nH]ccc2c1
   📈 Predicted: 71.50%
   🧪 Ground truth: 89.17%
   ❗ Error: -17.67%


[I 2025-08-12 05:50:16,753] Trial 11 finished with value: 60.282962799072266 and parameters: {'combo': ('COc1ccc(Br)cc1', 'CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1.OB(O)B(O)O', 'COc1ccc(B(O)O)cc1')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:16,875] Trial 12 finished with value: 73.8648452758789 and parameters: {'combo': ('CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1', 'CC(C)(C)P(c1ccccc1)C(C)(C)C.OB(O)B(O)O', 'OB(O)c1ccc(-c2ccccc2)cc1')}. Best is trial 2 with value: 74.84309387207031.


🔎 COc1ccc(Br)cc1 + CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1.OB(O)B(O)O → COc1ccc(B(O)O)cc1
   📈 Predicted: 60.28%
   🧪 Ground truth: 46.71%
   ❗ Error: +13.57%
🔎 CN(C)S(=O)(=O)Oc1ccc(-c2ccccc2)cc1 + CC(C)(C)P(c1ccccc1)C(C)(C)C.OB(O)B(O)O → OB(O)c1ccc(-c2ccccc2)cc1
   📈 Predicted: 73.86%
   🧪 Ground truth: 1.79%
   ❗ Error: +72.07%


[I 2025-08-12 05:50:16,997] Trial 13 finished with value: 74.29632568359375 and parameters: {'combo': ('CN(C)C(=O)c1ccc(Cl)cc1', 'c1ccc(-c2nn(-c3ccccc3)c(-c3ccccc3)c2-n2nccc2P(C2CCCCC2)C2CCCCC2)cc1.OB(O)B(O)O', 'CN(C)C(=O)c1ccc(B(O)O)cc1')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:17,126] Trial 14 finished with value: 63.866268157958984 and parameters: {'combo': ('CN(C)S(=O)(=O)Oc1ccc2ncccc2c1', 'CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O', 'OB(O)c1ccc2ncccc2c1')}. Best is trial 2 with value: 74.84309387207031.


🔎 CN(C)C(=O)c1ccc(Cl)cc1 + c1ccc(-c2nn(-c3ccccc3)c(-c3ccccc3)c2-n2nccc2P(C2CCCCC2)C2CCCCC2)cc1.OB(O)B(O)O → CN(C)C(=O)c1ccc(B(O)O)cc1
   📈 Predicted: 74.30%
   🧪 Ground truth: 71.94%
   ❗ Error: +2.36%
🔎 CN(C)S(=O)(=O)Oc1ccc2ncccc2c1 + CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O → OB(O)c1ccc2ncccc2c1
   📈 Predicted: 63.87%
   🧪 Ground truth: 49.34%
   ❗ Error: +14.53%


[I 2025-08-12 05:50:17,232] Trial 15 finished with value: 44.13725280761719 and parameters: {'combo': ('Cc1ncccc1Br', 'CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O', 'Cc1ncccc1B(O)O')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:17,378] Trial 16 finished with value: 66.46917724609375 and parameters: {'combo': ('Brc1cnc2ccccc2c1', 'CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1.OB(O)B(O)O', 'OB(O)c1cnc2ccccc2c1')}. Best is trial 2 with value: 74.84309387207031.


🔎 Cc1ncccc1Br + CC(=C(c1ccccc1)c1ccccc1)P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O → Cc1ncccc1B(O)O
   📈 Predicted: 44.14%
   🧪 Ground truth: 57.17%
   ❗ Error: -13.03%
🔎 Brc1cnc2ccccc2c1 + CC(C)c1cc(C(C)C)c(-c2ccccc2P(c2ccccc2)c2ccccc2)c(C(C)C)c1.OB(O)B(O)O → OB(O)c1cnc2ccccc2c1
   📈 Predicted: 66.47%
   🧪 Ground truth: 23.23%
   ❗ Error: +43.24%


[I 2025-08-12 05:50:17,498] Trial 17 finished with value: 74.84309387207031 and parameters: {'combo': ('COc1cc(Cl)ccc1F', 'c1ccc(P(c2ccccc2)C2CCCCC2)cc1.OB(O)B(O)O', 'COc1cc(B(O)O)ccc1F')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:17,609] Trial 18 finished with value: 64.73448181152344 and parameters: {'combo': ('Brc1ccccc1-c1ccccc1', 'COc1ccccc1C1=C(P(C2CCCCC2)C2CCCCC2)C2c3ccccc3C1c1ccccc12.OB(O)B(O)O', 'OB(O)c1ccccc1-c1ccccc1')}. Best is trial 2 with value: 74.84309387207031.
[I 2025-08-12 05:50:17,696] Trial 19 finished with value: 54.16734313964844 and parameters: {'combo': ('Brc1ccsc1', 'Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O', 'OB(O)c1ccsc1')}. Best is trial 2 with value: 74.84309387207031.


🔎 COc1cc(Cl)ccc1F + c1ccc(P(c2ccccc2)C2CCCCC2)cc1.OB(O)B(O)O → COc1cc(B(O)O)ccc1F
   📈 Predicted: 74.84%
   🧪 Ground truth: 49.17%
   ❗ Error: +25.67%
🔎 Brc1ccccc1-c1ccccc1 + COc1ccccc1C1=C(P(C2CCCCC2)C2CCCCC2)C2c3ccccc3C1c1ccccc12.OB(O)B(O)O → OB(O)c1ccccc1-c1ccccc1
   📈 Predicted: 64.73%
   🧪 Ground truth: 46.78%
   ❗ Error: +17.95%
🔎 Brc1ccsc1 + Cc1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1.OB(O)B(O)O → OB(O)c1ccsc1
   📈 Predicted: 54.17%
   🧪 Ground truth: 48.23%
   ❗ Error: +5.93%


## ベイズ最適化・ファインチューニングのループ

In [59]:
import os
import csv
import math
import time
import random
from dataclasses import dataclass

import torch.nn.functional as F
from transformers import Trainer,TrainingArguments,DataCollatorWithPadding
from optuna.samplers import TPESampler

In [60]:
class CollatorForYield:
    def __init__(self, tokenizer):
        self.pad = DataCollatorWithPadding(tokenizer)
    def __call__(self, features):
        has_labels = "labels" in features[0]
        if has_labels:
            labels = torch.tensor([float(f["labels"]) for f in features], dtype=torch.float)
        token_feats = [{k: v for k, v in f.items() if k in ("input_ids", "attention_mask")} for f in features]
        batch = self.pad(token_feats)
        if has_labels:
            batch["labels"] = labels
        return batch

In [61]:
class YieldTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels", None)
        preds = model(inputs).squeeze(-1)
        if labels is None:
            loss = preds.new_zeros(())
        else:
            loss = F.mse_loss(preds, labels)
        return (loss, preds) if return_outputs else loss

In [62]:
class YieldDataset(torch.utils.data.Dataset):
    def __init__(self, texts, y, tokenizer, max_length=512):
        self.enc = tokenizer(texts, truncation=True, padding=False, max_length=max_length)
        self.y = y
    def __len__(self): return len(self.y)
    def __getitem__(self, i):
        return {
            "input_ids": torch.tensor(self.enc["input_ids"][i], dtype=torch.long),
            "attention_mask": torch.tensor(self.enc["attention_mask"][i], dtype=torch.long),
            "labels": torch.tensor(self.y[i], dtype=torch.float),  # [%]
        }


In [63]:
@dataclass
class LoopConfig:
    n_rounds: int = 3
    trials_per_round: int = 50
    study_seed: int = 42
    learning_rate: float = 5e-4
    epochs_per_round: int = 5
    weight_decay: float = 0.01
    max_length: int = 512
    batch_size_train: int = 16
    batch_size_eval: int = 32
    val_ratio: float = 0.2
    output_dir: str = "runs/iter_yield"
    log_csv_name: str = "bo_log.csv"

In [64]:
def iterative_optuna_finetune(
    *,
    predict_yield_fn,
    valid_combinations,
    true_yield_dict,
    tokenizer,
    model,
    cfg: LoopConfig = LoopConfig(),
):
    os.makedirs(cfg.output_dir, exist_ok=True)
    log_csv_path = os.path.join(cfg.output_dir, cfg.log_csv_name)

    # CSVヘッダ（存在しなければ作成）
    if not os.path.exists(log_csv_path):
        with open(log_csv_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow([
                "ts", "round", "trial_index", "reactant", "reagent", "product",
                "pred_yield_pct", "true_yield_pct", "error_pct",
                "was_used_for_ft", "study_best_pred", "study_best_true"
            ])

    # これまで探索した（reactant,reagent）組の重複防止 optional
    seen_pairs: set = set()

    # 進捗
    cumulative_true_texts = []
    cumulative_true_labels = []

    train_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for r in range(1, cfg.n_rounds + 1):
        print(f"\n==== Round {r}/{cfg.n_rounds} ====")

        model.to("cpu")
        try:
            torch.cuda.empty_cache()
        except Exception:
            pass

        round_records = []

        # ---- Optuna Study ----
        storage_path = os.path.join(cfg.output_dir, f"round_{r}.db")
        study = optuna.create_study(
            direction="maximize",
            sampler=TPESampler(seed=cfg.study_seed + r),
            storage=f"sqlite:///{storage_path}",
            study_name=f"yield_round_{r}",
            load_if_exists=True,
        )

        def objective(trial: optuna.Trial) -> float:
            # カテゴリ探索
            n = len(valid_combinations)
            combo_idx = trial.suggest_int("combo_idx", 0, n-1)
            reactant, reagent, product = valid_combinations[combo_idx]

            # 重複チェック
            pair = (reactant, reagent, product)
            if pair in seen_pairs:
                trial.set_user_attr("duplicate_pair", True)
            else:
                trial.set_user_attr("duplicate_pair", False)

            input_str = f"REACTANT:{reactant}REAGENT:{reagent}PRODUCT:{product}"

            # 予測
            try:
                pred_y = float(predict_yield_fn(input_str))  # [%]
            except Exception as e:
                print(f"❌ prediction error: {e}")
                pred_y = 0.0

            # クリッピング（安全策）
            if not math.isfinite(pred_y):
                pred_y = 0.0
            pred_y = max(0.0, min(100.0, pred_y))

            # 真値（%）— 無い場合は 0% を採用
            true = true_yield_dict.get(pair, None)
            if true is None:
                true_pct = 0.0
                error_pct = pred_y - true_pct
                trial.set_user_attr("imputed_true_zero", True)
            else:
                true_pct = float(true) * 100.0
                error_pct = pred_y - true_pct
                trial.set_user_attr("imputed_true_zero", False)

            # エラー
            error_pct = None if true_pct is None else (pred_y - true_pct)

            # Optuna user attrs にも残す
            trial.set_user_attr("reactant", reactant)
            trial.set_user_attr("reagent", reagent)
            trial.set_user_attr("product", product)
            trial.set_user_attr("pred_yield_pct", pred_y)
            trial.set_user_attr("true_yield_pct", true_pct)
            trial.set_user_attr("error_pct", error_pct)

            # 一旦メモリにも保存（後でCSV出力）
            round_records.append({
                "reactant": reactant,
                "reagent": reagent,
                "product": product,
                "pred_yield_pct": pred_y,
                "true_yield_pct": true_pct,
                "error_pct": error_pct,
            })

            # 目的関数は「予測収率の最大化」
            return pred_y

        study.optimize(objective, n_trials=cfg.trials_per_round, n_jobs=1)

        model.to(train_device)
        
        # ---- ラウンドの結果をCSVへ出力 ----
        best_pred = float(study.best_value) if study.best_value is not None else None
        # best の真値
        best_trial = study.best_trial if study.best_trial else None
        best_true = None
        if best_trial:
            bt_true = best_trial.user_attrs.get("true_yield_pct", None)
            best_true = None if bt_true is None else float(bt_true)

        with open(log_csv_path, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            for idx, rec in enumerate(round_records, start=1):
                writer.writerow([
                    int(time.time()),
                    r,
                    idx,
                    rec["reactant"],
                    rec["reagent"],
                    rec["product"],
                    f'{rec["pred_yield_pct"]:.6f}',
                    "" if rec["true_yield_pct"] is None else f'{rec["true_yield_pct"]:.6f}',
                    "" if rec["error_pct"] is None else f'{rec["error_pct"]:+.6f}',
                    "",  # was_used_for_ft はFT後に上書き
                    "" if best_pred is None else f"{best_pred:.6f}",
                    "" if best_true is None else f"{best_true:.6f}",
                ])

        # ---- ラウンドの真値付きデータで FT 用データセット作成 ----
        texts_round = []
        labels_round = []  # [%]
        for rec in round_records:
            if rec["true_yield_pct"] is not None:
                inp = f"REACTANT:{rec['reactant']}REAGENT:{rec['reagent']}PRODUCT:{rec['product']}"
                texts_round.append(inp)
                labels_round.append(float(rec["true_yield_pct"]))

        if len(texts_round) == 0:
            print(f"[Round {r}] 真値付きデータが0件のため、微調整はスキップします。")
            continue

        # 累積データに追加
        cumulative_true_texts.extend(texts_round)
        cumulative_true_labels.extend(labels_round)

        # ---- 学習/評価分割（十分な件数のときのみ評価）----
        idxs = list(range(len(cumulative_true_texts)))
        random.Random(cfg.study_seed + r).shuffle(idxs)

        n_total = len(idxs)
        n_val = int(n_total * cfg.val_ratio)
        if n_val >= 5:  # 最低5件確保できたときだけ eval
            val_idx = idxs[:n_val]
            train_idx = idxs[n_val:]
        else:
            val_idx = []
            train_idx = idxs

        def subset(lst, sel): return [lst[i] for i in sel]

        train_ds = YieldDataset(
            subset(cumulative_true_texts, train_idx),
            subset(cumulative_true_labels, train_idx),
            tokenizer,
            max_length=cfg.max_length,
        )
        eval_ds = None
        if len(val_idx) > 0:
            eval_ds = YieldDataset(
                subset(cumulative_true_texts, val_idx),
                subset(cumulative_true_labels, val_idx),
                tokenizer,
                max_length=cfg.max_length,
            )

        # ---- Trainer 準備・学習 ----
        out_dir_round = os.path.join(cfg.output_dir, f"round_{r}")
        args = TrainingArguments(
            output_dir=out_dir_round,
            learning_rate=cfg.learning_rate,
            num_train_epochs=cfg.epochs_per_round,
            per_device_train_batch_size=min(cfg.batch_size_train, max(1, len(train_ds))),
            per_device_eval_batch_size=cfg.batch_size_eval,
            weight_decay=cfg.weight_decay,
            logging_steps=50,
            save_strategy="no",
            report_to="none",
            fp16=torch.cuda.is_available(),
            remove_unused_columns=False
        )

        def compute_metrics(eval_pred):
            import numpy as np
            preds = np.array(eval_pred.predictions).reshape(-1)
            labels = np.array(eval_pred.label_ids).reshape(-1)
            mae = float(np.mean(np.abs(preds - labels)))
            rmse = float(np.sqrt(np.mean((preds - labels) ** 2)))
            return {"mae_pct": mae, "rmse_pct": rmse}

        trainer = YieldTrainer(
            model=model,
            args=args,
            train_dataset=train_ds,
            eval_dataset=eval_ds,
            data_collator=CollatorForYield(tokenizer),
            compute_metrics=compute_metrics if eval_ds else None,
        )

        print(f"[Round {r}] Fine-tuning on {len(train_ds)} samples"
              + (f", eval {len(eval_ds)} samples" if eval_ds else ""))

        trainer.train()
        trainer.save_model(out_dir_round)  # fc層を含む全体を保存

        # ---- このラウンドで FT に使った試行を CSV にマーク ----
        # （簡易的に：直近ラウンドの真値付き行の was_used_for_ft を 1 に上書き）
        # 既存CSVを読み書きする
        with open(log_csv_path, "r", encoding="utf-8") as f:
            rows = list(csv.reader(f))
        header = rows[0]
        # カラム位置
        was_used_idx = header.index("was_used_for_ft")
        round_idx = header.index("round")
        trial_idx = header.index("trial_index")
        react_idx = header.index("reactant")
        reag_idx = header.index("reagent")
        prod_idx = header.index("product")

        ft_pairs = {(rec["reactant"], rec["reagent"], rec["product"]) for rec in round_records if rec["true_yield_pct"] is not None}
        for i in range(1, len(rows)):
            row = rows[i]
            if int(row[round_idx]) == r and (row[react_idx], row[reag_idx], row[prod_idx]) in ft_pairs:
                row[was_used_idx] = "1"
        with open(log_csv_path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerows(rows)

        # seen に今回の組み合わせを加えて、以降の重複を（弱く）抑止
        for rec in round_records:
            seen_pairs.add((rec["reactant"], rec["reagent"]))

    print("\nDone. Logs:")
    print(f"- Trials CSV: {log_csv_path}")
    print(f"- Optuna DBs: {cfg.output_dir}/round_*.db")
    print(f"- Checkpoints per round: {cfg.output_dir}/round_*/")

In [65]:
cfg = LoopConfig(
    n_rounds=10,
    trials_per_round=10,
    study_seed=42,
    learning_rate=5e-4,
    epochs_per_round=5,
    weight_decay=0.01,
    max_length=512,
    batch_size_train=16,
    batch_size_eval=32,
    val_ratio=0.2,
    output_dir="runs/10rounds_10_trials_yield_valid_combinations",
)

In [66]:
iterative_optuna_finetune(
    predict_yield_fn=predict_yield,
    valid_combinations=valid_combinations,
    true_yield_dict=true_yield_dict,
    tokenizer=yield_tokenizer,
    model=yield_model,
    cfg=cfg,
)


==== Round 1/10 ====


[I 2025-08-12 05:50:18,129] A new study created in RDB with name: yield_round_1
[I 2025-08-12 05:50:18,415] Trial 0 finished with value: 57.70353698730469 and parameters: {'combo_idx': 154}. Best is trial 0 with value: 57.70353698730469.
[I 2025-08-12 05:50:18,681] Trial 1 finished with value: 74.9722671508789 and parameters: {'combo_idx': 820}. Best is trial 1 with value: 74.9722671508789.
[I 2025-08-12 05:50:18,931] Trial 2 finished with value: 79.29293823242188 and parameters: {'combo_idx': 179}. Best is trial 2 with value: 79.29293823242188.
[I 2025-08-12 05:50:19,172] Trial 3 finished with value: 76.81636810302734 and parameters: {'combo_idx': 324}. Best is trial 2 with value: 79.29293823242188.
[I 2025-08-12 05:50:19,445] Trial 4 finished with value: 67.95651245117188 and parameters: {'combo_idx': 440}. Best is trial 2 with value: 79.29293823242188.
[I 2025-08-12 05:50:19,685] Trial 5 finished with value: 73.18510437011719 and parameters: {'combo_idx': 1157}. Best is trial 2 with

[Round 1] Fine-tuning on 10 samples
{'train_runtime': 0.2717, 'train_samples_per_second': 184.045, 'train_steps_per_second': 18.405, 'train_loss': 1188.13037109375, 'epoch': 5.0}

==== Round 2/10 ====


[I 2025-08-12 05:50:22,566] A new study created in RDB with name: yield_round_2
[I 2025-08-12 05:50:22,865] Trial 0 finished with value: 61.088443756103516 and parameters: {'combo_idx': 1124}. Best is trial 0 with value: 61.088443756103516.
[I 2025-08-12 05:50:23,136] Trial 1 finished with value: 57.628746032714844 and parameters: {'combo_idx': 141}. Best is trial 0 with value: 61.088443756103516.
[I 2025-08-12 05:50:23,445] Trial 2 finished with value: 70.20208740234375 and parameters: {'combo_idx': 1003}. Best is trial 2 with value: 70.20208740234375.
[I 2025-08-12 05:50:23,746] Trial 3 finished with value: 53.4261589050293 and parameters: {'combo_idx': 485}. Best is trial 2 with value: 70.20208740234375.
[I 2025-08-12 05:50:24,063] Trial 4 finished with value: 69.27572631835938 and parameters: {'combo_idx': 483}. Best is trial 2 with value: 70.20208740234375.
[I 2025-08-12 05:50:24,344] Trial 5 finished with value: 70.74156188964844 and parameters: {'combo_idx': 820}. Best is trial 

[Round 2] Fine-tuning on 20 samples
{'train_runtime': 0.566, 'train_samples_per_second': 176.681, 'train_steps_per_second': 17.668, 'train_loss': 1171.2853515625, 'epoch': 5.0}

==== Round 3/10 ====


[I 2025-08-12 05:50:27,439] A new study created in RDB with name: yield_round_3
[I 2025-08-12 05:50:27,757] Trial 0 finished with value: 67.98828887939453 and parameters: {'combo_idx': 1332}. Best is trial 0 with value: 67.98828887939453.
[I 2025-08-12 05:50:28,020] Trial 1 finished with value: 71.32312774658203 and parameters: {'combo_idx': 740}. Best is trial 1 with value: 71.32312774658203.
[I 2025-08-12 05:50:28,279] Trial 2 finished with value: 61.71318435668945 and parameters: {'combo_idx': 379}. Best is trial 1 with value: 71.32312774658203.
[I 2025-08-12 05:50:28,537] Trial 3 finished with value: 73.459228515625 and parameters: {'combo_idx': 104}. Best is trial 3 with value: 73.459228515625.
[I 2025-08-12 05:50:28,810] Trial 4 finished with value: 71.69900512695312 and parameters: {'combo_idx': 598}. Best is trial 3 with value: 73.459228515625.
[I 2025-08-12 05:50:29,089] Trial 5 finished with value: 66.02289581298828 and parameters: {'combo_idx': 636}. Best is trial 3 with val

[Round 3] Fine-tuning on 24 samples, eval 6 samples
{'train_runtime': 0.6307, 'train_samples_per_second': 190.266, 'train_steps_per_second': 15.856, 'train_loss': 1230.43466796875, 'epoch': 5.0}

==== Round 4/10 ====


[I 2025-08-12 05:50:32,725] A new study created in RDB with name: yield_round_4
[I 2025-08-12 05:50:33,023] Trial 0 finished with value: 65.22242736816406 and parameters: {'combo_idx': 1055}. Best is trial 0 with value: 65.22242736816406.
[I 2025-08-12 05:50:33,295] Trial 1 finished with value: 66.01749420166016 and parameters: {'combo_idx': 855}. Best is trial 1 with value: 66.01749420166016.
[I 2025-08-12 05:50:33,585] Trial 2 finished with value: 69.39109802246094 and parameters: {'combo_idx': 335}. Best is trial 2 with value: 69.39109802246094.
[I 2025-08-12 05:50:33,911] Trial 3 finished with value: 73.54987335205078 and parameters: {'combo_idx': 1021}. Best is trial 3 with value: 73.54987335205078.
[I 2025-08-12 05:50:34,250] Trial 4 finished with value: 73.49612426757812 and parameters: {'combo_idx': 421}. Best is trial 3 with value: 73.54987335205078.
[I 2025-08-12 05:50:34,549] Trial 5 finished with value: 71.7630844116211 and parameters: {'combo_idx': 1262}. Best is trial 3 w

[Round 4] Fine-tuning on 32 samples, eval 8 samples
{'train_runtime': 0.7259, 'train_samples_per_second': 220.418, 'train_steps_per_second': 13.776, 'train_loss': 1285.97685546875, 'epoch': 5.0}

==== Round 5/10 ====


[I 2025-08-12 05:50:38,270] A new study created in RDB with name: yield_round_5
[I 2025-08-12 05:50:38,798] Trial 0 finished with value: 63.43986892700195 and parameters: {'combo_idx': 152}. Best is trial 0 with value: 63.43986892700195.
[I 2025-08-12 05:50:39,297] Trial 1 finished with value: 67.7176742553711 and parameters: {'combo_idx': 1312}. Best is trial 1 with value: 67.7176742553711.
[I 2025-08-12 05:50:39,774] Trial 2 finished with value: 73.34500885009766 and parameters: {'combo_idx': 981}. Best is trial 2 with value: 73.34500885009766.
[I 2025-08-12 05:50:40,070] Trial 3 finished with value: 75.17556762695312 and parameters: {'combo_idx': 473}. Best is trial 3 with value: 75.17556762695312.
[I 2025-08-12 05:50:40,331] Trial 4 finished with value: 75.8772964477539 and parameters: {'combo_idx': 953}. Best is trial 4 with value: 75.8772964477539.
[I 2025-08-12 05:50:40,595] Trial 5 finished with value: 68.59016418457031 and parameters: {'combo_idx': 1077}. Best is trial 4 with 

[Round 5] Fine-tuning on 40 samples, eval 10 samples
{'train_runtime': 0.8643, 'train_samples_per_second': 231.4, 'train_steps_per_second': 17.355, 'train_loss': 1379.8893229166667, 'epoch': 5.0}

==== Round 6/10 ====


[I 2025-08-12 05:50:44,143] A new study created in RDB with name: yield_round_6
[I 2025-08-12 05:50:44,482] Trial 0 finished with value: 57.891700744628906 and parameters: {'combo_idx': 23}. Best is trial 0 with value: 57.891700744628906.
[I 2025-08-12 05:50:44,751] Trial 1 finished with value: 51.606346130371094 and parameters: {'combo_idx': 1200}. Best is trial 0 with value: 57.891700744628906.
[I 2025-08-12 05:50:45,049] Trial 2 finished with value: 71.19015502929688 and parameters: {'combo_idx': 383}. Best is trial 2 with value: 71.19015502929688.
[I 2025-08-12 05:50:45,321] Trial 3 finished with value: 65.22799682617188 and parameters: {'combo_idx': 402}. Best is trial 2 with value: 71.19015502929688.
[I 2025-08-12 05:50:45,601] Trial 4 finished with value: 63.787109375 and parameters: {'combo_idx': 1066}. Best is trial 2 with value: 71.19015502929688.
[I 2025-08-12 05:50:45,907] Trial 5 finished with value: 75.4609603881836 and parameters: {'combo_idx': 437}. Best is trial 5 with

[Round 6] Fine-tuning on 48 samples, eval 12 samples
{'train_runtime': 1.0212, 'train_samples_per_second': 235.018, 'train_steps_per_second': 14.689, 'train_loss': 1198.596875, 'epoch': 5.0}

==== Round 7/10 ====


[I 2025-08-12 05:50:49,495] A new study created in RDB with name: yield_round_7
[I 2025-08-12 05:50:49,737] Trial 0 finished with value: 74.01555633544922 and parameters: {'combo_idx': 405}. Best is trial 0 with value: 74.01555633544922.
[I 2025-08-12 05:50:49,967] Trial 1 finished with value: 63.77295684814453 and parameters: {'combo_idx': 332}. Best is trial 0 with value: 74.01555633544922.
[I 2025-08-12 05:50:50,217] Trial 2 finished with value: 69.82061767578125 and parameters: {'combo_idx': 1247}. Best is trial 0 with value: 74.01555633544922.
[I 2025-08-12 05:50:50,445] Trial 3 finished with value: 63.585723876953125 and parameters: {'combo_idx': 1200}. Best is trial 0 with value: 74.01555633544922.
[I 2025-08-12 05:50:50,676] Trial 4 finished with value: 59.160118103027344 and parameters: {'combo_idx': 920}. Best is trial 0 with value: 74.01555633544922.
[I 2025-08-12 05:50:50,917] Trial 5 finished with value: 71.14501953125 and parameters: {'combo_idx': 763}. Best is trial 0 wi

[Round 7] Fine-tuning on 56 samples, eval 14 samples
{'train_runtime': 1.2391, 'train_samples_per_second': 225.967, 'train_steps_per_second': 16.141, 'train_loss': 1134.07021484375, 'epoch': 5.0}

==== Round 8/10 ====


[I 2025-08-12 05:50:54,391] A new study created in RDB with name: yield_round_8
[I 2025-08-12 05:50:54,647] Trial 0 finished with value: 49.71087646484375 and parameters: {'combo_idx': 666}. Best is trial 0 with value: 49.71087646484375.
[I 2025-08-12 05:50:54,858] Trial 1 finished with value: 57.37260437011719 and parameters: {'combo_idx': 307}. Best is trial 1 with value: 57.37260437011719.
[I 2025-08-12 05:50:55,073] Trial 2 finished with value: 72.2946548461914 and parameters: {'combo_idx': 344}. Best is trial 2 with value: 72.2946548461914.
[I 2025-08-12 05:50:55,271] Trial 3 finished with value: 64.16877746582031 and parameters: {'combo_idx': 533}. Best is trial 2 with value: 72.2946548461914.
[I 2025-08-12 05:50:55,485] Trial 4 finished with value: 65.7093276977539 and parameters: {'combo_idx': 508}. Best is trial 2 with value: 72.2946548461914.
[I 2025-08-12 05:50:55,708] Trial 5 finished with value: 66.57337188720703 and parameters: {'combo_idx': 1342}. Best is trial 2 with va

[Round 8] Fine-tuning on 64 samples, eval 16 samples
{'train_runtime': 1.2543, 'train_samples_per_second': 255.119, 'train_steps_per_second': 15.945, 'train_loss': 1048.6716796875, 'epoch': 5.0}

==== Round 9/10 ====


[I 2025-08-12 05:50:59,091] A new study created in RDB with name: yield_round_9
[I 2025-08-12 05:50:59,315] Trial 0 finished with value: 58.06071853637695 and parameters: {'combo_idx': 910}. Best is trial 0 with value: 58.06071853637695.
[I 2025-08-12 05:50:59,524] Trial 1 finished with value: 61.97481155395508 and parameters: {'combo_idx': 60}. Best is trial 1 with value: 61.97481155395508.
[I 2025-08-12 05:50:59,743] Trial 2 finished with value: 48.546417236328125 and parameters: {'combo_idx': 462}. Best is trial 1 with value: 61.97481155395508.
[I 2025-08-12 05:50:59,947] Trial 3 finished with value: 68.15154266357422 and parameters: {'combo_idx': 867}. Best is trial 3 with value: 68.15154266357422.
[I 2025-08-12 05:51:00,168] Trial 4 finished with value: 66.68270111083984 and parameters: {'combo_idx': 382}. Best is trial 3 with value: 68.15154266357422.
[I 2025-08-12 05:51:00,395] Trial 5 finished with value: 63.539241790771484 and parameters: {'combo_idx': 1278}. Best is trial 3 w

[Round 9] Fine-tuning on 72 samples, eval 18 samples
{'train_runtime': 1.5495, 'train_samples_per_second': 232.326, 'train_steps_per_second': 16.134, 'train_loss': 764.6971875, 'epoch': 5.0}

==== Round 10/10 ====


[I 2025-08-12 05:51:04,096] A new study created in RDB with name: yield_round_10
[I 2025-08-12 05:51:04,370] Trial 0 finished with value: 38.51378631591797 and parameters: {'combo_idx': 1108}. Best is trial 0 with value: 38.51378631591797.
[I 2025-08-12 05:51:04,671] Trial 1 finished with value: 46.82652282714844 and parameters: {'combo_idx': 35}. Best is trial 1 with value: 46.82652282714844.
[I 2025-08-12 05:51:04,917] Trial 2 finished with value: 34.08576202392578 and parameters: {'combo_idx': 283}. Best is trial 1 with value: 46.82652282714844.
[I 2025-08-12 05:51:05,231] Trial 3 finished with value: 45.09576416015625 and parameters: {'combo_idx': 833}. Best is trial 1 with value: 46.82652282714844.
[I 2025-08-12 05:51:05,495] Trial 4 finished with value: 43.59731674194336 and parameters: {'combo_idx': 132}. Best is trial 1 with value: 46.82652282714844.
[I 2025-08-12 05:51:05,811] Trial 5 finished with value: 48.79877471923828 and parameters: {'combo_idx': 835}. Best is trial 5 wi

[Round 10] Fine-tuning on 80 samples, eval 20 samples
{'train_runtime': 1.6705, 'train_samples_per_second': 239.452, 'train_steps_per_second': 14.966, 'train_loss': 718.9609375, 'epoch': 5.0}

Done. Logs:
- Trials CSV: runs/10rounds_10_trials_yield_valid_combinations/bo_log.csv
- Optuna DBs: runs/10rounds_10_trials_yield_valid_combinations/round_*.db
- Checkpoints per round: runs/10rounds_10_trials_yield_valid_combinations/round_*/


In [67]:
import pandas as pd
import matplotlib.pyplot as plt

In [68]:
def visualize_bo_logs(csv_path: str, out_dir: str | None = None, show: bool = False, dpi: int = 180):
    # 出力ディレクトリ（集約）
    root = out_dir or (os.path.dirname(csv_path) or ".")
    save_dir = os.path.join(root, "bo_viz")
    os.makedirs(save_dir, exist_ok=True)

    # ロード & 型整形
    df = pd.read_csv(csv_path)
    to_num_cols = [
        "round", "trial_index", "pred_yield_pct", "true_yield_pct",
        "error_pct", "study_best_pred", "study_best_true"
    ]
    for c in to_num_cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    # 真値ありデータ
    df_obs = df.dropna(subset=["true_yield_pct"]) if "true_yield_pct" in df.columns else pd.DataFrame()
    # error_pct が無い/NaN の場合は再計算
    if not df_obs.empty:
        df_obs["error_pct"] = df_obs["pred_yield_pct"] - df_obs["true_yield_pct"]

    # ===== 全体指標 =====
    overall = {}
    if not df_obs.empty:
        y = df_obs["true_yield_pct"].to_numpy()
        yhat = df_obs["pred_yield_pct"].to_numpy()
        err = yhat - y
        overall = {
            "n": int(len(df_obs)),
            "mae_pct": float(np.mean(np.abs(err))),
            "rmse_pct": float(np.sqrt(np.mean(err**2))),
            "bias_pct": float(np.mean(err)),
            "r2": float(1 - np.sum(err**2) / np.sum((y - y.mean())**2)) if len(df_obs) > 1 else np.nan,
        }
    pd.DataFrame([overall]).to_csv(os.path.join(save_dir, "metrics_overall.csv"), index=False)

    # ===== ラウンド別指標（MAE/RMSE/Bias/R2）=====
    metrics_by_round = []
    if not df_obs.empty and "round" in df_obs.columns:
        for r, d in df_obs.groupby("round", dropna=True):
            y = d["true_yield_pct"].to_numpy()
            yhat = d["pred_yield_pct"].to_numpy()
            err = yhat - y
            metrics_by_round.append({
                "round": int(r),
                "n": int(len(d)),
                "mae_pct": float(np.mean(np.abs(err))),
                "rmse_pct": float(np.sqrt(np.mean(err**2))),
                "bias_pct": float(np.mean(err)),
                "r2": float(1 - np.sum(err**2) / np.sum((y - y.mean())**2)) if len(d) > 1 else np.nan,
            })
    mdf = (pd.DataFrame(metrics_by_round)
             .sort_values("round")
             if metrics_by_round else pd.DataFrame(columns=["round","n","mae_pct","rmse_pct","bias_pct","r2"]))
    mdf.to_csv(os.path.join(save_dir, "metrics_by_round.csv"), index=False)

    # ===== 図: パリティ =====
    if not df_obs.empty:
        fig = plt.figure(figsize=(5, 5))
        plt.scatter(df_obs["true_yield_pct"], df_obs["pred_yield_pct"], s=18, alpha=0.65)
        lims = [0, 100]
        plt.plot(lims, lims, linestyle="--")
        plt.xlim(lims); plt.ylim(lims)
        plt.xlabel("True Yield [%]"); plt.ylabel("Predicted Yield [%]")
        plt.title("Parity: Prediction vs Truth")
        fig.savefig(os.path.join(save_dir, "parity.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: 誤差ヒスト =====
    if not df_obs.empty and not df_obs["error_pct"].dropna().empty:
        fig = plt.figure(figsize=(6, 4))
        plt.hist(df_obs["error_pct"].dropna().to_numpy(), bins=30)
        plt.xlabel("Prediction Error [%]  (pred - true)")
        plt.ylabel("Count")
        plt.title("Error Histogram")
        fig.savefig(os.path.join(save_dir, "error_hist.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: ラウンド別ベスト真値 =====
    if not df_obs.empty and "round" in df_obs.columns:
        best_by_round = df_obs.groupby("round")["true_yield_pct"].max()
        fig = plt.figure(figsize=(6, 4))
        plt.plot(best_by_round.index, best_by_round.values, marker="o")
        plt.xlabel("Round"); plt.ylabel("Best Observed True Yield [%]")
        plt.title("Best True Yield per Round"); plt.grid(True, alpha=0.3)
        fig.savefig(os.path.join(save_dir, "best_true_per_round.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== 図: キャリブレーション =====
    if not df_obs.empty:
        bins = np.linspace(0, 100, 11)  # 10ビン
        cut = pd.cut(df_obs["pred_yield_pct"], bins, include_lowest=True)
        calib = df_obs.groupby(cut).agg(
            pred_mean=("pred_yield_pct", "mean"),
            true_mean=("true_yield_pct", "mean"),
            n=("true_yield_pct", "size")
        ).dropna()
        if not calib.empty:
            fig = plt.figure(figsize=(6, 4))
            lims = [0, 100]
            plt.plot(calib["pred_mean"], calib["true_mean"], marker="o")
            plt.plot(lims, lims, linestyle="--")
            plt.xlim(lims); plt.ylim(lims)
            plt.xlabel("Predicted Mean (per bin) [%]")
            plt.ylabel("Observed Mean (per bin) [%]")
            plt.title("Calibration Curve")
            fig.savefig(os.path.join(save_dir, "calibration.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)
            calib.to_csv(os.path.join(save_dir, "calibration_table.csv"))

    # ===== 図: ラウンド別（真値/予測）の箱ひげ =====
    if not df_obs.empty and "round" in df_obs.columns:
        rounds = sorted(df_obs["round"].dropna().unique())
        if len(rounds) > 0:
            fig = plt.figure(figsize=(7, 4))
            pos = np.array(rounds, dtype=float)
            data_true = [df_obs[df_obs["round"] == r]["true_yield_pct"].to_numpy() for r in rounds]
            data_pred = [df_obs[df_obs["round"] == r]["pred_yield_pct"].to_numpy() for r in rounds]
            plt.boxplot(data_true, positions=pos - 0.15, widths=0.25, patch_artist=True)
            plt.boxplot(data_pred, positions=pos + 0.15, widths=0.25, patch_artist=True)
            plt.xticks(rounds)
            plt.xlabel("Round"); plt.ylabel("Yield [%]")
            plt.title("Distributions per Round (True vs Pred)")
            fig.savefig(os.path.join(save_dir, "round_box.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    # ===== NEW: ラウンド別の誤差分布（箱ひげ）=====
    if not df_obs.empty and "round" in df_obs.columns:
        rounds = sorted(df_obs["round"].dropna().unique())
        if len(rounds) > 0:
            fig = plt.figure(figsize=(7, 4))
            data_err = [df_obs[df_obs["round"] == r]["error_pct"].dropna().to_numpy() for r in rounds]
            plt.boxplot(data_err, positions=np.array(rounds, dtype=float), widths=0.5, patch_artist=True)
            plt.axhline(0.0, linestyle="--")
            plt.xticks(rounds)
            plt.xlabel("Round"); plt.ylabel("Prediction Error (pred - true) [%]")
            plt.title("Prediction Error by Round (Boxplot)")
            fig.savefig(os.path.join(save_dir, "error_box_by_round.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    # ===== NEW: ラウンド別の MAE/RMSE/Bias 推移 =====
    if not mdf.empty:
        fig = plt.figure(figsize=(7, 4))
        plt.plot(mdf["round"], mdf["mae_pct"], marker="o", label="MAE [%]")
        plt.plot(mdf["round"], mdf["rmse_pct"], marker="o", label="RMSE [%]")
        plt.plot(mdf["round"], mdf["bias_pct"], marker="o", label="Bias (pred-true) [%]")
        plt.xlabel("Round"); plt.ylabel("Error [%]")
        plt.title("Prediction Error by Round")
        plt.grid(True, alpha=0.3); plt.legend()
        fig.savefig(os.path.join(save_dir, "error_by_round.png"), dpi=dpi, bbox_inches="tight")
        if show: plt.show()
        plt.close(fig)

    # ===== Optuna study best =====
    if "study_best_pred" in df.columns:
        best_df = df.dropna(subset=["round", "study_best_pred"]).groupby("round").agg(
            best_pred=("study_best_pred", "max"),
            best_true=("study_best_true", "max")
        ).reset_index()
        if not best_df.empty:
            fig = plt.figure(figsize=(6, 4))
            plt.plot(best_df["round"], best_df["best_pred"], marker="o", label="Study Best Pred [%]")
            if "study_best_true" in best_df.columns and best_df["best_true"].notna().any():
                plt.plot(best_df["round"], best_df["best_true"], marker="o", label="Study Best True [%]")
            plt.xlabel("Round"); plt.ylabel("Yield [%]")
            plt.title("Optuna Study Best per Round"); plt.grid(True, alpha=0.3); plt.legend()
            fig.savefig(os.path.join(save_dir, "study_best.png"), dpi=dpi, bbox_inches="tight")
            if show: plt.show()
            plt.close(fig)

    return {"overall": overall, "by_round": mdf, "save_dir": save_dir}


In [69]:
results = visualize_bo_logs("runs/10rounds_10_trials_yield_valid_combinations/bo_log.csv", show=False)
print(results["overall"])

  calib = df_obs.groupby(cut).agg(


{'n': 100, 'mae_pct': 27.757277220000002, 'rmse_pct': 33.61776703853803, 'bias_pct': 19.484875579999997, 'r2': -0.6523801547890267}


## ランダムな実験

In [70]:
n_rounds=10
trials_per_round=10

In [71]:
best_yields_per_round = []

for _ in range(n_rounds):
    best_yield = 0
    for _ in range(trials_per_round):
        combo = random.choice(valid_combinations)
        yield_value = true_yield_dict[combo]
        if yield_value > best_yield:
            best_yield = yield_value
    best_yields_per_round.append(best_yield)

In [72]:
best_yields_per_round

[0.7656999999999999,
 0.8825775420842658,
 0.5731,
 0.9525,
 0.8428999999999999,
 0.8630382805936712,
 0.6930607532903948,
 0.9216241949033883,
 0.9278128928,
 0.7373904989000001]