运行 Cell 1（只需一次）。
把初始 10+ 条实验数据存为 data/round_0.csv，然后运行 Cell 2。
每次做完实验后：
打开 Cell 3 运行 → 得到 5 组新配方
把 真实测得的峰位 填入 data/round_{i}_to_test.csv 的最后一列（列名仍叫 峰位）
把该文件重命名为 data/round_{i}.csv
再次运行 Cell 3，循环即可。

In [2]:
# ------------------------------------------------------------
# Cell 1：环境 & 通用函数（全部改用 .xlsx）
# ------------------------------------------------------------
import numpy as np
import pandas as pd
import optuna
from pathlib import Path
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel as C
from sklearn.model_selection import KFold, cross_val_predict
import matplotlib.pyplot as plt
import seaborn as sns

# 全局路径
DATA_DIR = Path('/home/ubuntu/50T/LYT/matagent/response-matter/data')
LOG_FILE = Path('log.xlsx')          # ← 改为 .xlsx
FEATURES = ['CsBr', 'CsCl', 'PbBr2', 'PbCl2', 'Oam', 'OA']
TARGET   = '峰位'

DATA_DIR.mkdir(exist_ok=True)

# ------------------------------------------------------------
# 通用函数（全部改用 .xlsx）
# ------------------------------------------------------------
def load_all_data():
    """把 data/ 目录下所有 round_*.xlsx 拼起来"""
    xlsx_files = sorted(DATA_DIR.glob('round_*.xlsx'))
    return pd.concat([pd.read_excel(f) for f in xlsx_files], ignore_index=True)


def train_gp(df):
    """给定 DataFrame，返回训练好的 gp 和 scaler"""
    X = df[FEATURES].values
    y = df[TARGET].values

    sx = StandardScaler()
    sy = StandardScaler()
    Xn = sx.fit_transform(X)
    yn = sy.fit_transform(y.reshape(-1, 1)).ravel()

    kernel = (
        C(1.0, (1e-2, 1e3))
        * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
        + WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-5, 1e1))
    )
    gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=False)
    gp.fit(Xn, yn)

    cv = KFold(n_splits=min(10, len(df)), shuffle=True, random_state=42)
    yn_cv = cross_val_predict(gp, Xn, yn, cv=cv)
    y_cv = sy.inverse_transform(yn_cv.reshape(-1, 1)).ravel()
    r2  = r2_score(y, y_cv)
    mae = mean_absolute_error(y, y_cv)
    return gp, sx, sy, r2, mae


def train_rf(df):
    """给定 DataFrame，返回训练好的随机森林模型和 scaler"""
    X = df[FEATURES].values
    y = df[TARGET].values
 
    # 与 train_gp 完全一致的缩放
    sx = StandardScaler()
    sy = StandardScaler()
    Xn = sx.fit_transform(X)
    yn = sy.fit_transform(y.reshape(-1, 1)).ravel()
 
    # 随机森林模型
    rf = RandomForestRegressor(
        n_estimators=500,        # 树的数量
        max_depth=None,          # 不限制深度
        min_samples_split=2,     # 最小分裂样本
        min_samples_leaf=1,      # 最小叶节点样本
        random_state=42
    )
    rf.fit(Xn, yn)
 
    # 与 train_gp 完全一致的交叉验证
    cv = KFold(n_splits=min(10, len(df)), shuffle=True, random_state=42)
    yn_cv = cross_val_predict(rf, Xn, yn, cv=cv)
    y_cv = sy.inverse_transform(yn_cv.reshape(-1, 1)).ravel()
    r2  = r2_score(y, y_cv)
    mae = mean_absolute_error(y, y_cv)
 
    return rf, sx, sy, r2, mae


# def suggest_next_experiments_gp(gp, sx, sy, n_candidates=5, target=460, beta=2.0):
#     """使用UCB策略平衡探索与利用"""
#     def objective(trial):
#         # 参数采样,添加step参数限制精度
#         csbr = trial.suggest_float('CsBr', 0, 0.02, step=0.001)      # 精确到0.001
#         pbbr2 = trial.suggest_float('PbBr2', 0, 0.02, step=0.001)    # 精确到0.001
#         oam = trial.suggest_float('Oam', 0.001, 0.02, step=0.001)    # 精确到0.001
#         oa = trial.suggest_float('OA', 0.01, 0.2, step=0.001)        # 精确到0.001
        
#         # 约束检查
#         pbcl2 = round(0.02 - pbbr2, 3)  # 显式四舍五入
#         cscl = round(0.02 - csbr, 3)
#         if not (0 <= pbcl2 <= 0.02 and 0 <= cscl <= 0.02):
#             return float('inf')
 
#         # 预测
#         x = np.array([
#             [round(csbr,3), round(cscl,3), 
#              round(pbbr2,3), round(pbcl2,3),
#              round(oam,3), round(oa,3)]
#         ])
#         xn = sx.transform(x)
#         mu, sigma = gp.predict(xn, return_std=True)
        
#         # 逆标准化
#         mu_orig = sy.inverse_transform(mu.reshape(-1, 1)).item()
#         sigma_orig = sigma.item() * sy.scale_[0]
        
#         # UCB获取函数
#         target_gap = abs(mu_orig - target)
#         return target_gap - beta * sigma_orig  # minimize时自动平衡
    
#     study = optuna.create_study(direction='minimize',
#                                 sampler=optuna.samplers.TPESampler(seed=42))
#     study.optimize(objective, n_trials=500, show_progress_bar=False)
 
#     # 只保留可行 trial，并按 UCB 值升序取前 k 个
#     feasible_trials = [t for t in study.trials if t.value != float('inf')]
#     best_trials = sorted(feasible_trials, key=lambda t: t.value)[:n_candidates]

#     if not best_trials:
#         return pd.DataFrame(columns=FEATURES)
 
#     rows = []
#     for trial in best_trials:
#         p = trial.params
#         p.update({'PbCl2': 0.02 - p['PbBr2'],
#                   'CsCl':  0.02 - p['CsBr']})
 
#         # 记录
#         x  = np.array([[p[f] for f in FEATURES]])
#         xn = sx.transform(x)
#         mu, sigma = gp.predict(xn, return_std=True)
#         pred = sy.inverse_transform(mu.reshape(-1, 1)).item()
#         std  = sigma.item() * sy.scale_[0]
 
#         rows.append({**{f: p[f] for f in FEATURES},
#                      'predicted_peak': pred,
#                      'predicted_std':  std,
#                      'ucb_value':      trial.value})
 
#     return pd.DataFrame(rows)

def suggest_next_experiments_gp(
        gp, sx, sy, df_train,
        n_candidates=5,
        target=460,
        beta=2.0,
        tolerance=20,        # ±20 nm 以内算“邻域”
        search_radius=0.005, # 每个特征向外扩 0.005
        precision=3):        # 小数点后 3 位
    """
    高斯过程 + 邻域搜索 + 精度限制
    df_train : 原始训练 DataFrame，必须含 '峰位' 列
    """
    # ---------- 1. 内部预测 ----------
    def _gp_predict(xn):
        mu, sigma = gp.predict(xn, return_std=True)
        mu_orig = sy.inverse_transform(mu.reshape(-1, 1)).ravel()
        sigma_orig = sigma * sy.scale_[0]
        return mu_orig[0], sigma_orig[0]

    # ---------- 2. 找到邻域样本 ----------
    target_col = '峰位'
    close_samples = df_train[
        abs(df_train[target_col] - target) <= tolerance
    ].copy()

    if close_samples.empty:
        # 放宽容忍度
        tolerance *= 1.5
        close_samples = df_train[
            abs(df_train[target_col] - target) <= tolerance
        ].copy()

    if close_samples.empty:
        # 仍无样本，回退到全空间
        print("⚠️  无邻域样本，回退到全空间")
        bounds = {
            'CsBr': (0, 0.02),
            'PbBr2': (0, 0.02),
            'Oam': (0.001, 0.02),
            'OA': (0.01, 0.2)
        }
    else:
        # 以邻域样本的 min/max 为基准，向外扩 search_radius
        bounds = {}
        for feat in ['CsBr', 'PbBr2', 'Oam', 'OA']:
            lo = max(0, close_samples[feat].min() - search_radius)
            hi = min(0.02 if feat != 'OA' else 0.2,
                     close_samples[feat].max() + search_radius)
            bounds[feat] = (round(lo, precision), round(hi, precision))

    print("邻域搜索边界：")
    for k, (lo, hi) in bounds.items():
        print(f"  {k}: [{lo:.3f}, {hi:.3f}]")

    # ---------- 3. 目标函数 ----------
    def objective(trial):
        csbr = trial.suggest_float('CsBr', *bounds['CsBr'], step=10**-precision)
        pbbr2 = trial.suggest_float('PbBr2', *bounds['PbBr2'], step=10**-precision)
        oam = trial.suggest_float('Oam', *bounds['Oam'], step=10**-precision)
        oa = trial.suggest_float('OA', *bounds['OA'], step=10**-precision)

        pbcl2 = round(0.02 - pbbr2, precision)
        cscl = round(0.02 - csbr, precision)
        if not (0 <= pbcl2 <= 0.02 and 0 <= cscl <= 0.02):
            return float('inf')

        x = np.array([[round(csbr, precision),
                       round(cscl, precision),
                       round(pbbr2, precision),
                       round(pbcl2, precision),
                       round(oam, precision),
                       round(oa, precision)]])
        xn = sx.transform(x)
        mu_orig, sigma_orig = _gp_predict(xn)

        return abs(mu_orig - target) - beta * sigma_orig

    # ---------- 4. 贝叶斯优化 ----------
    study = optuna.create_study(direction='minimize',
                                sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=500, show_progress_bar=False)

    feasible_trials = [t for t in study.trials if t.value != float('inf')]

    # 去重（四舍五入后可能重复）
    unique = {}
    for tr in feasible_trials:
        key = tuple(round(tr.params[k], precision) for k in ['CsBr', 'PbBr2', 'Oam', 'OA'])
        if key not in unique or tr.value < unique[key].value:
            unique[key] = tr

    best_trials = sorted(unique.values(), key=lambda t: t.value)[:n_candidates]

    if not best_trials:
        return pd.DataFrame(columns=['CsBr', 'CsCl', 'PbBr2', 'PbCl2', 'Oam', 'OA',
                                     'predicted_peak', 'predicted_std', 'ucb_value'])

    # ---------- 5. 组装结果 ----------
    rows = []
    for trial in best_trials:
        p = {k: round(v, precision) for k, v in trial.params.items()}
        p.update({'PbCl2': round(0.02 - p['PbBr2'], precision),
                  'CsCl':  round(0.02 - p['CsBr'], precision)})

        x = np.array([[p[f] for f in ['CsBr', 'CsCl', 'PbBr2', 'PbCl2', 'Oam', 'OA']]])
        xn = sx.transform(x)
        pred, std = _gp_predict(xn)

        rows.append({**{f: p[f] for f in ['CsBr', 'CsCl', 'PbBr2', 'PbCl2', 'Oam', 'OA']},
                     'predicted_peak': pred,
                     'predicted_std': std,
                     'ucb_value': trial.value})

    return pd.DataFrame(rows)


# def suggest_next_experiments_rf(rf, sx, sy,
#                                 n_candidates=5,
#                                 target=460,
#                                 beta=2.0):
#     """
#     用随机森林 + UCB 策略推荐下一轮实验配方
#     rf   : 已训练好的 RandomForestRegressor
#     sx   : 特征 StandardScaler
#     sy   : 目标 StandardScaler
#     """
#     # ---------- 1. 内部函数：计算均值 + 标准差 ----------
#     def _rf_predict(xn):
#         """
#         输入标准化特征 xn，返回反标准化后的均值和标准差
#         """
#         # 每棵树的预测（标准化空间）
#         tree_preds = np.array([tree.predict(xn) for tree in rf.estimators_])
#         mu_n = tree_preds.mean(axis=0)
#         sigma_n = tree_preds.std(axis=0)
 
#         # 反标准化
#         mu_orig = sy.inverse_transform(mu_n.reshape(-1, 1)).ravel()
#         sigma_orig = sigma_n * sy.scale_[0]   # 线性变换
#         return mu_orig[0], sigma_orig[0]
 
#     # ---------- 2. Optuna 目标函数 ----------
#     def objective(trial):
#         # 参数采样,添加step参数限制精度
#         csbr = trial.suggest_float('CsBr', 0, 0.02, step=0.001)      # 精确到0.001
#         pbbr2 = trial.suggest_float('PbBr2', 0, 0.02, step=0.001)    # 精确到0.001
#         oam = trial.suggest_float('Oam', 0.001, 0.02, step=0.001)    # 精确到0.001
#         oa = trial.suggest_float('OA', 0.01, 0.2, step=0.001)        # 精确到0.001
        
#         # 约束检查
#         pbcl2 = round(0.02 - pbbr2, 3)  # 显式四舍五入
#         cscl = round(0.02 - csbr, 3)
#         if not (0 <= pbcl2 <= 0.02 and 0 <= cscl <= 0.02):
#             return float('inf')
 
#         # 预测
#         x = np.array([
#             [round(csbr,3), round(cscl,3), 
#              round(pbbr2,3), round(pbcl2,3),
#              round(oam,3), round(oa,3)]
#         ])
#         xn = sx.transform(x)
 
#         mu_orig, sigma_orig = _rf_predict(xn)
 
#         target_gap = abs(mu_orig - target)
#         return target_gap - beta * sigma_orig   # UCB 获取函数
 
#     # ---------- 3. Optuna 优化 ----------
#     study = optuna.create_study(direction='minimize',
#                                 sampler=optuna.samplers.TPESampler(seed=42))
#     study.optimize(objective, n_trials=100, show_progress_bar=False)
 
#     feasible_trials = [t for t in study.trials if t.value != float('inf')]
#     best_trials = sorted(feasible_trials, key=lambda t: t.value)[:n_candidates]
 
#     if not best_trials:
#         return pd.DataFrame(columns=FEATURES)
 
#     # ---------- 4. 组装结果 ----------
#     rows = []
#     for trial in best_trials:
#         p = trial.params
#         # 再次四舍五入，防止浮点误差
#         p = {k: round(v, 3) for k, v in p.items()}
#         p.update({'PbCl2': round(0.02 - p['PbBr2'], 3),
#                   'CsCl':  round(0.02 - p['CsBr'], 3)})
 
#         x  = np.array([[p[f] for f in FEATURES]])
#         xn = sx.transform(x)
#         pred, std = _rf_predict(xn)
 
#         rows.append({**{f: p[f] for f in FEATURES},
#                      'predicted_peak': pred,
#                      'predicted_std':  std,
#                      'ucb_value':      trial.value})
 
#     return pd.DataFrame(rows)


def suggest_next_experiments_rf(rf, sx, sy, df_train,
                                n_candidates=5,
                                target=460,
                                beta=2.0,
                                search_radius=0.005):
    """
    改进版贝叶斯优化策略
    df_train: 训练数据（用于确定搜索范围）
    search_radius: 在现有配方基础上的搜索半径
    """
    # ---------- 1. 确定目标邻域配方 ----------
    # 筛选接近目标的训练样本
    target_range = (455, 465)
    neighbor_samples = df_train[
        (df_train[TARGET] >= target_range[0]) &
        (df_train[TARGET] <= target_range[1])
    ]
    
    # 如果没有邻域样本，使用全局范围
    if len(neighbor_samples) == 0:
        neighbor_samples = df_train
    
    # 计算各参数的动态范围
    param_ranges = {
        'CsBr': (neighbor_samples['CsBr'].min() - search_radius,
                 neighbor_samples['CsBr'].max() + search_radius),
        'PbBr2': (neighbor_samples['PbBr2'].min() - search_radius,
                  neighbor_samples['PbBr2'].max() + search_radius),
        'Oam': (neighbor_samples['Oam'].min() - 0.0025,
                neighbor_samples['Oam'].max() + 0.0025),
        'OA': (neighbor_samples['OA'].min() - 0.025,
               neighbor_samples['OA'].max() + 0.025)
    }
    
    # 确保范围不越界
    param_ranges['CsBr'] = (max(0, param_ranges['CsBr'][0]), 
                           min(0.02, param_ranges['CsBr'][1]))
    param_ranges['PbBr2'] = (max(0, param_ranges['PbBr2'][0]), 
                            min(0.02, param_ranges['PbBr2'][1]))
    param_ranges['Oam'] = (max(0.001, param_ranges['Oam'][0]), 
                          min(0.02, param_ranges['Oam'][1]))
    param_ranges['OA'] = (max(0.01, param_ranges['OA'][0]), 
                         min(0.2, param_ranges['OA'][1]))
    
    # ---------- 2. 预测函数 ----------
    def _rf_predict(xn):
        tree_preds = np.array([tree.predict(xn) for tree in rf.estimators_])
        mu_n = tree_preds.mean(axis=0)
        sigma_n = tree_preds.std(axis=0)
        mu_orig = sy.inverse_transform(mu_n.reshape(-1, 1)).ravel()
        sigma_orig = sigma_n * sy.scale_[0]
        return mu_orig[0], sigma_orig[0]
    
    # ---------- 3. 优化目标函数 ----------
    def objective(trial):
        # 动态范围采样
        csbr = trial.suggest_float(
            'CsBr', 
            param_ranges['CsBr'][0], 
            param_ranges['CsBr'][1],
            step=0.001
        )
        pbbr2 = trial.suggest_float(
            'PbBr2',
            param_ranges['PbBr2'][0],
            param_ranges['PbBr2'][1],
            step=0.001
        )
        oam = trial.suggest_float(
            'Oam',
            param_ranges['Oam'][0],
            param_ranges['Oam'][1],
            step=0.0025
        )
        oa = trial.suggest_float(
            'OA',
            param_ranges['OA'][0],
            param_ranges['OA'][1],
            step=0.025
        )
        
        # 约束处理
        pbcl2 = round(0.02 - pbbr2, 3)
        cscl = round(0.02 - csbr, 3)
        if not (0 <= pbcl2 <= 0.02 and 0 <= cscl <= 0.02):
            return float('inf')
        
        # 排除训练数据中已存在的配方
        existing = df_train[
            (df_train['CsBr'].round(3) == round(csbr,3)) &
            (df_train['PbBr2'].round(3) == round(pbbr2,3)) &
            (df_train['Oam'].round(3) == round(oam,3)) &
            (df_train['OA'].round(3) == round(oa,3))
        ]
        if not existing.empty:
            return float('inf')
        
        # 预测
        x = np.array([[csbr, cscl, pbbr2, pbcl2, oam, oa]])
        xn = sx.transform(x)
        mu_orig, sigma_orig = _rf_predict(xn)
        
        # 改进的获取函数
        target_gap = abs(mu_orig - target)
        proximity_bonus = 1/(1 + target_gap)  # 距离越近奖励越高
        return target_gap #- beta*sigma_orig - proximity_bonus
    
    # ---------- 4. 优化执行 ----------
    study = optuna.create_study(direction='minimize',
                               sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=500, show_progress_bar=False)
    
    # ---------- 5. 结果后处理 ----------
    feasible_trials = [t for t in study.trials if t.value != float('inf')]
    
    # 去重处理
    seen = set()
    unique_trials = []
    for t in feasible_trials:
        key = (
            round(t.params['CsBr'],3),
            round(t.params['PbBr2'],3),
            round(t.params['Oam'],3),
            round(t.params['OA'],3)
        )
        if key not in seen:
            seen.add(key)
            unique_trials.append(t)
    
    best_trials = sorted(unique_trials, key=lambda t: t.value)[:n_candidates]
    
    # 组装结果
    rows = []
    for trial in best_trials:
        p = {k: round(v,3) for k,v in trial.params.items()}
        p.update({
            'PbCl2': round(0.02 - p['PbBr2'],3),
            'CsCl': round(0.02 - p['CsBr'],3)
        })
        
        x = np.array([[p[f] for f in FEATURES]])
        xn = sx.transform(x)
        pred, std = _rf_predict(xn)
        
        rows.append({
            **{f: p[f] for f in FEATURES},
            'predicted_peak': round(pred, 2),
            'predicted_std': round(std, 2),
            'ucb_value': round(trial.value, 4),
            'is_novel': not df_train[
                (df_train['CsBr'].round(3) == p['CsBr']) &
                (df_train['PbBr2'].round(3) == p['PbBr2']) &
                (df_train['Oam'].round(3) == p['Oam']) &
                (df_train['OA'].round(3) == p['OA'])
            ].any().any()
        })
    
    return pd.DataFrame(rows).sort_values('predicted_peak')

In [None]:
import time

# 根据迭代轮次衰减探索强度
def beta_schedule(round_number):
    return 2.0 * (0.9 ** round_number)  # 指数衰减

try:
    # 1) 加载数据
    df_all = load_all_data()
    print(f"当前总样本数：{len(df_all)}")
    time.sleep(3)
    
    # 2) 训练模型
    # gp, sx, sy, r2, mae = train_gp(df_all)
    rf, sx, sy, r2, mae = train_rf(df_all)
    print(f"模型性能：R²={r2:.3f}, MAE={mae:.2f} nm")
    
    # 3) 推荐新配方
    current_round = len(list(DATA_DIR.glob('round_*.xlsx')))
    next_df = suggest_next_experiments_rf(
        rf, sx, sy, df_all,
        n_candidates=3,
        target=460,
        beta=1.5,
        search_radius=0.005
    )
    # next_df = suggest_next_experiments_gp(
    #     gp, sx, sy, df_all,
    #     n_candidates=3,
    #     target=460,
    #     beta=1.5,
    #     search_radius=0.005
    # )
    if next_df.empty:
        print("未找到可行解，请调整参数范围")
    else:
        print("\n建议的 3 组新配方：")
        display(next_df)
        
        # 4) 保存结果
        round_id = len(list(DATA_DIR.glob('round_*.xlsx')))
        rec_file = DATA_DIR / f"round_{round_id}_to_test.xlsx"
        next_df.to_excel(rec_file, index=False)
        print(f"已保存待测配方 → {rec_file}")

except Exception as e:
    print(f"程序运行失败：{str(e)}")


当前总样本数：35


[I 2025-07-28 10:08:45,807] A new study created in memory with name: no-name-95f78e59-abd6-4de0-82f6-299a88856e44
[I 2025-07-28 10:08:45,850] Trial 0 finished with value: 45.71719999999999 and parameters: {'CsBr': 0.009999999999999998, 'PbBr2': 0.018, 'Oam': 0.006, 'OA': 0.035}. Best is trial 0 with value: 45.71719999999999.
[I 2025-07-28 10:08:45,889] Trial 1 finished with value: 84.4762 and parameters: {'CsBr': 0.005999999999999999, 'PbBr2': 0.002, 'Oam': 0.001, 'OA': 0.06}. Best is trial 0 with value: 45.71719999999999.
[I 2025-07-28 10:08:45,928] Trial 2 finished with value: 6.5756000000000085 and parameters: {'CsBr': 0.013999999999999999, 'PbBr2': 0.013000000000000001, 'Oam': 0.001, 'OA': 0.06}. Best is trial 2 with value: 6.5756000000000085.
[I 2025-07-28 10:08:45,967] Trial 3 finished with value: 11.795599999999979 and parameters: {'CsBr': 0.018, 'PbBr2': 0.004, 'Oam': 0.001, 'OA': 0.01}. Best is trial 2 with value: 6.5756000000000085.


模型性能：R²=0.738, MAE=10.55 nm


[I 2025-07-28 10:08:46,006] Trial 4 finished with value: 3.2584400000000073 and parameters: {'CsBr': 0.009, 'PbBr2': 0.009000000000000001, 'Oam': 0.0035, 'OA': 0.01}. Best is trial 4 with value: 3.2584400000000073.
[I 2025-07-28 10:08:46,046] Trial 5 finished with value: 74.04320000000001 and parameters: {'CsBr': 0.013999999999999999, 'PbBr2': 0.002, 'Oam': 0.001, 'OA': 0.035}. Best is trial 4 with value: 3.2584400000000073.
[I 2025-07-28 10:08:46,085] Trial 6 finished with value: 18.170933333333323 and parameters: {'CsBr': 0.011, 'PbBr2': 0.014, 'Oam': 0.001, 'OA': 0.035}. Best is trial 4 with value: 3.2584400000000073.
[I 2025-07-28 10:08:46,124] Trial 7 finished with value: 74.803 and parameters: {'CsBr': 0.013999999999999999, 'PbBr2': 0.0, 'Oam': 0.0035, 'OA': 0.01}. Best is trial 4 with value: 3.2584400000000073.
[I 2025-07-28 10:08:46,163] Trial 8 finished with value: 44.47180000000003 and parameters: {'CsBr': 0.004999999999999999, 'PbBr2': 0.018, 'Oam': 0.006, 'OA': 0.06}. Best 


建议的 5 组新配方：


Unnamed: 0,CsBr,CsCl,PbBr2,PbCl2,Oam,OA,predicted_peak,predicted_std,ucb_value,is_novel
4,0.018,0.002,0.012,0.008,0.006,0.035,459.65,12.5,0.3472,True
3,0.019,0.001,0.013,0.007,0.006,0.035,459.81,12.8,0.1864,True
0,0.019,0.001,0.012,0.008,0.006,0.035,459.84,12.83,0.1628,True
1,0.015,0.005,0.013,0.007,0.006,0.035,460.17,12.43,0.1676,True
2,0.015,0.005,0.012,0.008,0.006,0.035,460.18,12.44,0.1766,True


已保存待测配方 → /home/ubuntu/50T/LYT/matagent/response-matter/data/round_6_to_test.xlsx


In [17]:
# ------------------------------------------------------------
# Cell 4：实验完成后的结果分析和日志更新
# ------------------------------------------------------------

def analyze_latest_round():
    """分析最新一轮的实验结果并更新日志"""
    
    # 获取最新的round文件
    round_files = sorted(DATA_DIR.glob('round_*.xlsx'))
    if len(round_files) < 2:  # 需要至少有初始数据和一轮新实验
        print("还没有足够的实验数据进行分析")
        return
    
    latest_round_id = len(round_files) - 1
    latest_file = DATA_DIR / f'round_{latest_round_id}.xlsx'
    
    if not latest_file.exists():
        print(f"等待实验结果文件: {latest_file}")
        return
    
    # 读取数据
    df_all = load_all_data()
    latest_df = pd.read_excel(latest_file)
    
    print(f"=== 轮次 {latest_round_id} 实验结果分析 ===")
    print("\n最新实验结果:")
    display(latest_df[['CsBr', 'CsCl', 'PbBr2', 'PbCl2', 'Oam', 'OA', TARGET]])
    
    # 重新训练模型（包含最新数据）
    gp, sx, sy, r2, mae = train_gp(df_all)
    
    # 找距离460nm最近的
    all_peaks = df_all[TARGET].values
    latest_peaks = latest_df[TARGET].values
    
    # 历史最佳：距离460nm最近的
    all_distances = np.abs(all_peaks - 460)
    best_idx = np.argmin(all_distances)
    current_best_peak = all_peaks[best_idx]
    current_best_distance = all_distances[best_idx]
    
    # 本轮最佳：本轮距离460nm最近的
    round_distances = np.abs(latest_peaks - 460)
    round_best_idx = np.argmin(round_distances)
    round_best_peak = latest_peaks[round_best_idx]
    round_best_distance = round_distances[round_best_idx]
    
    # 检查是否有改进 - 比较距离460nm的远近
    previous_data = df_all[df_all.index < len(df_all) - len(latest_df)]
    if len(previous_data) > 0:
        previous_distances = np.abs(previous_data[TARGET].values - 460)
        previous_best_distance = np.min(previous_distances)
        improvement = previous_best_distance - current_best_distance  # 距离减小为正改进
    else:
        improvement = 0
    
    print(f"\n=== 性能分析 ===")
    print(f"本轮最接近460nm的峰位: {round_best_peak:.1f} nm (距离: {round_best_distance:.1f} nm)")
    print(f"历史最接近460nm的峰位: {current_best_peak:.1f} nm (距离: {current_best_distance:.1f} nm)")
    
    if improvement > 0:
        print(f"✅ 距离460nm更近了 {improvement:.1f} nm")
    elif improvement < 0:
        print(f"❌ 距离460nm更远了 {abs(improvement):.1f} nm")
    else:
        print("⚪ 与上轮最佳距离相同")
    
    print(f"\n本轮峰位范围: {latest_peaks.min():.1f} - {latest_peaks.max():.1f} nm")
    print(f"模型性能: R²={r2:.3f}, MAE={mae:.2f} nm")
    
    # 更新日志
    log = pd.DataFrame([{
        'round': latest_round_id,
        'n_samples': len(df_all),
        'R2': r2,
        'MAE': mae,
        'round_best_peak': round_best_peak,
        'round_distance_to_460': round_best_distance,
        'historical_best_peak': current_best_peak,
        'historical_distance_to_460': current_best_distance,
        'improvement_from_previous': improvement,  # 正值表示距离减小（改进）
        'round_peaks_mean': latest_peaks.mean(),
        'round_peaks_std': latest_peaks.std(),
        'round_peaks_min': latest_peaks.min(),
        'round_peaks_max': latest_peaks.max(),
        'achieved_target': 'Yes' if current_best_distance <= 1 else 'No'  # 1nm容差
    }])
    
    if LOG_FILE.exists():
        old_log = pd.read_excel(LOG_FILE)
        new_log = pd.concat([old_log, log], ignore_index=True)
    else:
        new_log = log
    
    new_log.to_excel(LOG_FILE, index=False)
    print(f"\n日志已更新到 {LOG_FILE}")
    
    return latest_round_id

# 运行分析
analyze_latest_round()


=== 轮次 7 实验结果分析 ===

最新实验结果:


Unnamed: 0,CsBr,CsCl,PbBr2,PbCl2,Oam,OA,峰位
0,0.013061,0.006939,0.01488,0.00512,0.006045,0.195161,443
1,0.004679,0.015321,0.011924,0.008076,0.003088,0.034701,447



=== 性能分析 ===
本轮最接近460nm的峰位: 447.0 nm (距离: 13.0 nm)
历史最接近460nm的峰位: 456.7 nm (距离: 3.3 nm)
⚪ 与上轮最佳距离相同

本轮峰位范围: 443.0 - 447.0 nm
模型性能: R²=0.877, MAE=9.88 nm

日志已更新到 log.xlsx


7