# 統合MD解析ノートブック

## 概要
- 複数条件のMDシミュレーション結果を一括解析
- 解析項目: MSD, 配位数, 反応カウント, RDF, 密度プロファイル, 分子識別
- 出力: 横2列のグラフ（フォントサイズ20以上）

## 解析スキップ機能
- 各条件フォルダの `analysis_results/` ディレクトリにサマリーファイルを保存
- サマリーファイル:
  - `alltrends_summary.csv`: MSD、配位数、反応数などの最終値
  - `alldensity_summary.csv`: 密度プロファイルの集計値
  - `allrdf_summary.csv`: RDFピーク位置・高さ
- 再実行時は既存データを参照し、解析済みの条件はスキップ
- `force_reanalyze=True` で強制再解析可能

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from ase.io import read
from ase.io.trajectory import Trajectory
from ase.geometry import get_distances
import warnings

warnings.filterwarnings('ignore')

# =============================================================================
# グローバル設定: フォントサイズ20以上
# =============================================================================
plt.rcParams.update({
    'font.size': 20,
    'axes.labelsize': 22,
    'axes.titlesize': 24,
    'xtick.labelsize': 20,
    'ytick.labelsize': 20,
    'legend.fontsize': 18,
    'figure.titlesize': 26,
    'lines.linewidth': 2.5
})

print("ライブラリ読み込み完了")

## 1. 解析条件の設定

In [None]:
# =============================================================================
# 解析条件の設定
# =============================================================================

# 比較する条件とディレクトリパス（必要に応じて変更）
CONDITIONS = {
    "Na-F-OH": Path("/home/jovyan/Kaori/MD/LiB_2/structure/Al_Na_F_OH"),
    "Li-F-OH": Path("/home/jovyan/Kaori/MD/LiB_2/structure/Al_Li_F_OH"),
    "LiOH": Path("/home/jovyan/Kaori/MD/LiB_2/structure/Al_LiOH_v8"),
}

# 解析対象
TARGETS = ["Al_Metal", "Al_Oxide"]
TEMPERATURES = [300, 400, 500, 600, 750, 900]

# 出力ディレクトリ
OUTPUT_DIR = Path("/home/jovyan/Kaori/MD/LiB_2/structure/Integrated_Analysis")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# シミュレーション時間設定
TOTAL_SIM_TIME_PS = 40.0  # 総シミュレーション時間 (ps)

# 結合判定カットオフ距離 (Angstrom)
CUTOFFS = {
    'Al_O': 2.5,
    'Al_F': 2.0,
    'O_H': 1.2,
    'H_H': 0.85,
    'Li_F': 2.0,
}

# 表面Al判定（Z方向の閾値）
SURFACE_THRESHOLD = 4.0  # Angstrom

# =============================================================================
# サマリーファイル名の定義
# =============================================================================
SUMMARY_FILES = {
    'trends': 'alltrends_summary.csv',
    'density': 'alldensity_summary.csv',
    'rdf': 'allrdf_summary.csv',
}

print(f"解析条件: {list(CONDITIONS.keys())}")
print(f"ターゲット: {TARGETS}")
print(f"温度: {TEMPERATURES}")
print(f"出力先: {OUTPUT_DIR}")

## 2. 共通解析関数の定義

In [None]:
# =============================================================================
# RDF計算関数
# =============================================================================
def compute_rdf(atoms, idx1, idx2, rmax=6.0, nbins=100):
    """動径分布関数(RDF)を計算"""
    if len(idx1) == 0 or len(idx2) == 0:
        return np.zeros(nbins), np.linspace(0, rmax, nbins + 1)
    
    p1 = atoms.positions[idx1]
    p2 = atoms.positions[idx2]
    _, dists = get_distances(p1, p2, cell=atoms.cell, pbc=atoms.pbc)
    
    # 自己相関排除
    if np.array_equal(idx1, idx2):
        valid_dists = dists[dists > 0.01]
    else:
        valid_dists = dists.flatten()
    
    hist, bin_edges = np.histogram(valid_dists, range=(0, rmax), bins=nbins)
    return hist, bin_edges


def normalize_rdf(hist, bin_edges, n_center, n_neighbor, volume):
    """RDFを正規化"""
    r = 0.5 * (bin_edges[1:] + bin_edges[:-1])
    dr = bin_edges[1] - bin_edges[0]
    rho = n_neighbor / volume
    shell_vol = 4 * np.pi * r**2 * dr
    gr = hist / (n_center * shell_vol * rho + 1e-10)
    return r, gr


# =============================================================================
# 結合カウント関数
# =============================================================================
def count_bonds(atoms, idx_center, idx_neighbor, cutoff):
    """カットオフ距離以内の結合数をカウント"""
    if len(idx_center) == 0 or len(idx_neighbor) == 0:
        return 0, 0
    
    p1 = atoms.positions[idx_center]
    p2 = atoms.positions[idx_neighbor]
    _, d_len = get_distances(p1, p2, cell=atoms.cell, pbc=atoms.pbc)
    d_matrix = d_len.reshape(len(idx_center), len(idx_neighbor))
    
    cn_per_atom = np.sum(d_matrix < cutoff, axis=1)
    avg_cn = np.mean(cn_per_atom)
    reacted_count = np.sum(cn_per_atom >= 1)
    
    return avg_cn, reacted_count


# =============================================================================
# OH基判定（Hと結合している酸素を抽出）
# =============================================================================
def get_oh_oxygen_indices(atoms, idx_o, idx_h, cutoff=1.2):
    """Hと結合している酸素のインデックスを取得"""
    if len(idx_o) == 0 or len(idx_h) == 0:
        return np.array([])
    
    p_o = atoms.positions[idx_o]
    p_h = atoms.positions[idx_h]
    _, oh_dists = get_distances(p_o, p_h, cell=atoms.cell, pbc=atoms.pbc)
    oh_matrix = oh_dists.reshape(len(idx_o), len(idx_h))
    
    is_o_with_h = np.any(oh_matrix < cutoff, axis=1)
    return idx_o[is_o_with_h]


# =============================================================================
# H2分子検出
# =============================================================================
def count_h2_molecules(atoms, idx_h, cutoff=0.85):
    """H2分子の数をカウント"""
    if len(idx_h) < 2:
        return 0
    
    p_h = atoms.positions[idx_h]
    _, h_dists = get_distances(p_h, p_h, cell=atoms.cell, pbc=atoms.pbc)
    # 自己距離を除外し、ペアをカウント
    h2_count = np.sum((h_dists > 0.01) & (h_dists < cutoff)) // 2
    return h2_count


# =============================================================================
# 表面/内部Al分類
# =============================================================================
def classify_surface_bulk_al(atoms, idx_al, threshold=4.0):
    """表面Alと内部Alを分類"""
    if len(idx_al) == 0:
        return np.array([]), np.array([])
    
    al_z = atoms.positions[idx_al, 2]
    z_max = np.max(al_z)
    
    surface_mask = al_z > (z_max - threshold)
    surface_idx = idx_al[surface_mask]
    bulk_idx = idx_al[~surface_mask]
    
    return surface_idx, bulk_idx


# =============================================================================
# MSD計算
# =============================================================================
def calculate_msd(current_pos, initial_pos):
    """平均二乗変位(MSD)を計算"""
    if initial_pos is None or len(initial_pos) == 0:
        return 0.0
    diff = current_pos - initial_pos
    return np.mean(np.sum(diff**2, axis=1))


print("共通関数定義完了")

In [None]:
# =============================================================================
# 既存解析結果の管理関数
# =============================================================================

def get_analysis_results_dir(cond_output_dir):
    """analysis_resultsディレクトリのパスを取得・作成"""
    results_dir = cond_output_dir / "analysis_results"
    results_dir.mkdir(parents=True, exist_ok=True)
    return results_dir


def load_existing_summary(results_dir, summary_type):
    """
    既存のサマリーファイルを読み込む
    
    Args:
        results_dir: analysis_resultsディレクトリ
        summary_type: 'trends', 'density', または 'rdf'
    
    Returns:
        DataFrame または None（ファイルが存在しない場合）
    """
    filepath = results_dir / SUMMARY_FILES[summary_type]
    if filepath.exists():
        try:
            df = pd.read_csv(filepath)
            print(f"    既存サマリー読み込み: {SUMMARY_FILES[summary_type]} ({len(df)}件)")
            return df
        except Exception as e:
            print(f"    警告: サマリー読み込みエラー: {e}")
            return None
    return None


def get_analyzed_keys(df_summary):
    """
    サマリーDataFrameから解析済みのキー（Target_Temp）セットを取得
    
    Args:
        df_summary: サマリーDataFrame（'Target'と'Temp'カラムを含む）
    
    Returns:
        set: 解析済みキーのセット（例: {'Al_Metal_300', 'Al_Oxide_600'}）
    """
    if df_summary is None or df_summary.empty:
        return set()
    
    # カラム名の確認（Target/target, Temp/temp など大文字小文字対応）
    target_col = None
    temp_col = None
    
    for col in df_summary.columns:
        if col.lower() == 'target':
            target_col = col
        elif col.lower() in ['temp', 'temperature']:
            temp_col = col
    
    if target_col is None or temp_col is None:
        # Label列からパースを試みる
        if 'Label' in df_summary.columns:
            return set(df_summary['Label'].unique())
        return set()
    
    keys = set()
    for _, row in df_summary.iterrows():
        key = f"{row[target_col]}_{int(row[temp_col])}K"
        keys.add(key)
    
    return keys


def check_if_analyzed(existing_keys, target, temp):
    """
    指定したtarget/tempの組み合わせが解析済みかチェック
    
    Args:
        existing_keys: 解析済みキーのセット
        target: ターゲット名（例: 'Al_Metal'）
        temp: 温度（例: 300）
    
    Returns:
        bool: True if already analyzed
    """
    key = f"{target}_{temp}K"
    return key in existing_keys


def save_summary_file(results_dir, summary_type, df_new, append=True):
    """
    サマリーファイルを保存（追記または上書き）
    
    Args:
        results_dir: analysis_resultsディレクトリ
        summary_type: 'trends', 'density', または 'rdf'
        df_new: 新しいデータのDataFrame
        append: True=既存データに追記, False=上書き
    """
    filepath = results_dir / SUMMARY_FILES[summary_type]
    
    if append and filepath.exists():
        try:
            df_existing = pd.read_csv(filepath)
            df_combined = pd.concat([df_existing, df_new], ignore_index=True)
            # 重複除去（TargetとTempに基づく）
            if 'Target' in df_combined.columns and 'Temp' in df_combined.columns:
                df_combined = df_combined.drop_duplicates(
                    subset=['Target', 'Temp'], keep='last'
                )
            elif 'Label' in df_combined.columns:
                df_combined = df_combined.drop_duplicates(
                    subset=['Label'], keep='last'
                )
            df_combined.to_csv(filepath, index=False)
        except Exception as e:
            print(f"    警告: 追記処理エラー: {e}")
            df_new.to_csv(filepath, index=False)
    else:
        df_new.to_csv(filepath, index=False)
    
    print(f"    保存: {SUMMARY_FILES[summary_type]}")


def load_existing_result_from_csv(results_dir, label):
    """
    既存のCSVファイルから解析結果を読み込む
    
    Args:
        results_dir: analysis_resultsディレクトリ
        label: ラベル（例: 'Al_Metal_300K'）
    
    Returns:
        dict: 読み込んだ結果（df, density, rdfデータ）またはNone
    """
    trends_file = results_dir / f"{label}_trends.csv"
    density_file = results_dir / f"{label}_density.csv"
    rdf_file = results_dir / f"{label}_rdf.csv"
    
    if not trends_file.exists():
        return None
    
    try:
        df_trends = pd.read_csv(trends_file)
        df_density = pd.read_csv(density_file) if density_file.exists() else None
        
        # RDFデータの読み込み
        rdf_data = {}
        if rdf_file.exists():
            df_rdf = pd.read_csv(rdf_file)
            if 'r' in df_rdf.columns:
                if 'gr_Al_O' in df_rdf.columns:
                    rdf_data['Al_O'] = (df_rdf['r'].values, df_rdf['gr_Al_O'].values)
                if 'gr_Al_F' in df_rdf.columns:
                    rdf_data['Al_F'] = (df_rdf['r'].values, df_rdf['gr_Al_F'].values)
        
        return {
            'df': df_trends,
            'rdf': rdf_data,
            'density': df_density,
            'label': label,
            'n_al': 0,  # CSVから復元できない情報はダミー値
            'n_surface': 0,
            'n_bulk': 0,
            'from_cache': True
        }
    except Exception as e:
        print(f"    警告: CSVからの読み込みエラー ({label}): {e}")
        return None


print("既存解析結果管理関数定義完了")

## 3. トラジェクトリ解析関数

In [None]:
def analyze_trajectory(traj_path, label, output_dir, target=None, temp=None):
    """
    単一トラジェクトリファイルの包括的解析
    
    Args:
        traj_path: トラジェクトリファイルのパス
        label: 出力ファイルのラベル（例: 'Al_Metal_300K'）
        output_dir: 出力ディレクトリ（analysis_resultsディレクトリ）
        target: ターゲット名（サマリーファイル用）
        temp: 温度（サマリーファイル用）
    
    Returns:
        dict: 解析結果を含む辞書
    """
    print(f"  解析中: {traj_path.name}")
    
    try:
        traj = Trajectory(str(traj_path))
    except Exception as e:
        print(f"    エラー: {e}")
        return None
    
    if len(traj) == 0:
        return None
    
    # 基本情報取得
    initial_atoms = traj[0]
    final_atoms = traj[-1]
    symbols = np.array(initial_atoms.get_chemical_symbols())
    cell_vol = final_atoms.get_volume()
    
    # 元素インデックス
    indices = {s: np.where(symbols == s)[0] for s in ['Al', 'O', 'Li', 'H', 'F', 'Na']}
    
    # 表面/内部Al分類
    idx_al = indices['Al'].flatten()
    surface_al, bulk_al = classify_surface_bulk_al(initial_atoms, idx_al, SURFACE_THRESHOLD)
    
    # 初期位置保存（MSD用）
    init_pos_total = initial_atoms.positions[idx_al] if len(idx_al) > 0 else None
    init_pos_surface = initial_atoms.positions[surface_al] if len(surface_al) > 0 else None
    init_pos_bulk = initial_atoms.positions[bulk_al] if len(bulk_al) > 0 else None
    
    # 時間設定
    dt_ps = TOTAL_SIM_TIME_PS / max(len(traj) - 1, 1)
    
    # データ蓄積用リスト
    data = {
        'Time_ps': [], 'MSD_Total': [], 'MSD_Surface': [], 'MSD_Bulk': [],
        'CN_Al_O': [], 'CN_Al_F': [], 'CN_Al_OH': [],
        'Reacted_Al_O': [], 'Reacted_Al_F': [], 'Reacted_Al_OH': [],
        'H2_Count': [], 'LiF_Count': []
    }
    
    # RDF用
    rdf_nbins = 100
    rdf_rmax = 6.0
    rdf_al_o_sum = np.zeros(rdf_nbins)
    rdf_al_f_sum = np.zeros(rdf_nbins)
    rdf_count = 0
    start_rdf_frame = int(len(traj) * 0.5)
    bin_edges = None
    
    # トラジェクトリ走査
    for i, atoms in enumerate(traj):
        data['Time_ps'].append(i * dt_ps)
        
        idx_o = indices['O'].flatten()
        idx_h = indices['H'].flatten()
        idx_f = indices['F'].flatten()
        idx_li = indices['Li'].flatten()
        
        # MSD計算
        if len(idx_al) > 0:
            data['MSD_Total'].append(calculate_msd(atoms.positions[idx_al], init_pos_total))
        else:
            data['MSD_Total'].append(0)
        
        if len(surface_al) > 0:
            data['MSD_Surface'].append(calculate_msd(atoms.positions[surface_al], init_pos_surface))
        else:
            data['MSD_Surface'].append(0)
        
        if len(bulk_al) > 0:
            data['MSD_Bulk'].append(calculate_msd(atoms.positions[bulk_al], init_pos_bulk))
        else:
            data['MSD_Bulk'].append(0)
        
        # Al-O結合
        cn_o, react_o = count_bonds(atoms, idx_al, idx_o, CUTOFFS['Al_O'])
        data['CN_Al_O'].append(cn_o)
        data['Reacted_Al_O'].append(react_o)
        
        # Al-F結合
        cn_f, react_f = count_bonds(atoms, idx_al, idx_f, CUTOFFS['Al_F'])
        data['CN_Al_F'].append(cn_f)
        data['Reacted_Al_F'].append(react_f)
        
        # Al-OH結合（OHがついている酸素のみ）
        idx_oh = get_oh_oxygen_indices(atoms, idx_o, idx_h, CUTOFFS['O_H'])
        cn_oh, react_oh = count_bonds(atoms, idx_al, idx_oh, CUTOFFS['Al_O'])
        data['CN_Al_OH'].append(cn_oh)
        data['Reacted_Al_OH'].append(react_oh)
        
        # H2分子
        h2 = count_h2_molecules(atoms, idx_h, CUTOFFS['H_H'])
        data['H2_Count'].append(h2)
        
        # Li-F結合
        _, lif = count_bonds(atoms, idx_li, idx_f, CUTOFFS['Li_F'])
        data['LiF_Count'].append(lif)
        
        # RDF積算（後半フレーム）
        if i >= start_rdf_frame:
            hist_o, edges = compute_rdf(atoms, idx_al, idx_o, rdf_rmax, rdf_nbins)
            rdf_al_o_sum += hist_o
            hist_f, _ = compute_rdf(atoms, idx_al, idx_f, rdf_rmax, rdf_nbins)
            rdf_al_f_sum += hist_f
            bin_edges = edges
            rdf_count += 1
    
    # DataFrame作成
    df = pd.DataFrame(data)
    
    # RDF正規化
    rdf_data = {}
    df_rdf = None
    if rdf_count > 0 and bin_edges is not None:
        r_o, gr_o = normalize_rdf(rdf_al_o_sum / rdf_count, bin_edges, 
                                   len(idx_al), len(indices['O']), cell_vol)
        rdf_data['Al_O'] = (r_o, gr_o)
        
        # RDFデータをDataFrameに変換
        rdf_df_data = {'r': r_o, 'gr_Al_O': gr_o}
        
        if len(indices['F']) > 0:
            r_f, gr_f = normalize_rdf(rdf_al_f_sum / rdf_count, bin_edges,
                                       len(idx_al), len(indices['F']), cell_vol)
            rdf_data['Al_F'] = (r_f, gr_f)
            rdf_df_data['gr_Al_F'] = gr_f
        
        df_rdf = pd.DataFrame(rdf_df_data)
    
    # 密度プロファイル（最終フレーム）
    cell_z = final_atoms.get_cell().diagonal()[2]
    bins_z = np.linspace(0, cell_z, 101)
    bin_centers = 0.5 * (bins_z[1:] + bins_z[:-1])
    density_data = {'Z': bin_centers}
    for elem, idx in indices.items():
        if len(idx) > 0:
            hist, _ = np.histogram(final_atoms.positions[idx, 2], bins=bins_z)
            density_data[elem] = hist
    df_density = pd.DataFrame(density_data)
    
    # CSV保存（個別ファイル）
    df.to_csv(output_dir / f"{label}_trends.csv", index=False)
    df_density.to_csv(output_dir / f"{label}_density.csv", index=False)
    if df_rdf is not None:
        df_rdf.to_csv(output_dir / f"{label}_rdf.csv", index=False)
    
    return {
        'df': df,
        'rdf': rdf_data,
        'density': df_density,
        'label': label,
        'n_al': len(idx_al),
        'n_surface': len(surface_al),
        'n_bulk': len(bulk_al),
        'target': target,
        'temp': temp,
        'from_cache': False
    }


print("トラジェクトリ解析関数定義完了")

## 4. プロット関数（横2列、フォントサイズ20以上）

In [None]:
def plot_single_condition_summary(results_by_temp, condition_name, target, output_dir):
    """
    単一条件・ターゲットの温度比較グラフ（横2列）
    """
    if not results_by_temp:
        return
    
    # 6つのメトリクス -> 3行2列
    metrics = [
        ('MSD_Surface', 'Surface Al MSD (Å²)'),
        ('MSD_Bulk', 'Bulk Al MSD (Å²)'),
        ('Reacted_Al_O', 'Al-O Bonded Count'),
        ('Reacted_Al_OH', 'Al-OH Bonded Count'),
        ('Reacted_Al_F', 'Al-F Bonded Count'),
        ('H2_Count', 'H₂ Molecule Count'),
    ]
    
    fig, axes = plt.subplots(3, 2, figsize=(20, 24))
    axes = axes.flatten()
    
    colors = plt.cm.plasma(np.linspace(0.1, 0.9, len(results_by_temp)))
    
    for i, (col, ylabel) in enumerate(metrics):
        ax = axes[i]
        for j, (temp, result) in enumerate(sorted(results_by_temp.items())):
            if result is not None and col in result['df'].columns:
                ax.plot(result['df']['Time_ps'], result['df'][col],
                       label=f"{temp}K", color=colors[j], linewidth=2.5)
        
        ax.set_xlabel("Time (ps)")
        ax.set_ylabel(ylabel)
        ax.legend(ncol=2, loc='best')
        ax.grid(True, alpha=0.3)
    
    fig.suptitle(f"{condition_name} - {target}: Temperature Comparison", fontsize=28, y=0.98)
    plt.tight_layout(rect=[0, 0.02, 1, 0.96])
    plt.savefig(output_dir / f"{condition_name}_{target}_temp_comparison.png", dpi=150, bbox_inches='tight')
    plt.close()
    print(f"    保存: {condition_name}_{target}_temp_comparison.png")


def plot_rdf_comparison(results_by_temp, condition_name, target, output_dir):
    """
    RDF比較グラフ（横2列: Al-O, Al-F）
    """
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    colors = plt.cm.viridis(np.linspace(0.1, 0.9, len(results_by_temp)))
    
    for j, (temp, result) in enumerate(sorted(results_by_temp.items())):
        if result is None:
            continue
        
        # Al-O RDF
        if 'Al_O' in result['rdf']:
            r, gr = result['rdf']['Al_O']
            axes[0].plot(r, gr, label=f"{temp}K", color=colors[j], linewidth=2.5)
        
        # Al-F RDF
        if 'Al_F' in result['rdf']:
            r, gr = result['rdf']['Al_F']
            axes[1].plot(r, gr, label=f"{temp}K", color=colors[j], linewidth=2.5)
    
    axes[0].set_xlabel("r (Å)")
    axes[0].set_ylabel("g(r)")
    axes[0].set_title("RDF: Al-O")
    axes[0].axvline(x=CUTOFFS['Al_O'], color='red', linestyle='--', alpha=0.7, label=f"Cutoff={CUTOFFS['Al_O']}Å")
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    axes[1].set_xlabel("r (Å)")
    axes[1].set_ylabel("g(r)")
    axes[1].set_title("RDF: Al-F")
    axes[1].axvline(x=CUTOFFS['Al_F'], color='red', linestyle='--', alpha=0.7, label=f"Cutoff={CUTOFFS['Al_F']}Å")
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    fig.suptitle(f"{condition_name} - {target}: RDF Comparison", fontsize=28)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.savefig(output_dir / f"{condition_name}_{target}_rdf.png", dpi=150, bbox_inches='tight')
    plt.close()
    print(f"    保存: {condition_name}_{target}_rdf.png")


def plot_density_profiles(results_by_temp, condition_name, target, output_dir):
    """
    密度プロファイル比較（代表温度）
    """
    # 代表として300Kと900Kを比較
    temps_to_plot = [300, 900]
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    elements = ['Al', 'O', 'F', 'Li', 'H']
    colors_elem = {'Al': 'blue', 'O': 'red', 'F': 'green', 'Li': 'purple', 'H': 'orange'}
    
    for i, temp in enumerate(temps_to_plot):
        if temp not in results_by_temp or results_by_temp[temp] is None:
            continue
        
        df_dens = results_by_temp[temp]['density']
        ax = axes[i]
        
        for elem in elements:
            if elem in df_dens.columns:
                ax.plot(df_dens['Z'], df_dens[elem], label=elem,
                       color=colors_elem.get(elem, 'gray'), linewidth=2.5)
        
        ax.set_xlabel("Z (Å)")
        ax.set_ylabel("Count")
        ax.set_title(f"Density Profile at {temp}K")
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    fig.suptitle(f"{condition_name} - {target}: Density Profiles", fontsize=28)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.savefig(output_dir / f"{condition_name}_{target}_density.png", dpi=150, bbox_inches='tight')
    plt.close()
    print(f"    保存: {condition_name}_{target}_density.png")


print("プロット関数定義完了")

## 5. 条件間比較プロット関数

In [None]:
def plot_cross_condition_comparison(all_results, output_dir):
    """
    全条件の最終状態を比較するグラフ（横2列）
    """
    summary_data = []
    
    for cond_name, targets_data in all_results.items():
        for target, temp_data in targets_data.items():
            for temp, result in temp_data.items():
                if result is not None:
                    df = result['df']
                    final = df.iloc[-1]
                    summary_data.append({
                        'Condition': cond_name,
                        'Target': target,
                        'Temp': temp,
                        'Final_MSD_Surface': final['MSD_Surface'],
                        'Final_MSD_Bulk': final['MSD_Bulk'],
                        'Final_Reacted_Al_O': final['Reacted_Al_O'],
                        'Final_Reacted_Al_OH': final['Reacted_Al_OH'],
                        'Final_Reacted_Al_F': final['Reacted_Al_F'],
                        'Final_H2': final['H2_Count'],
                    })
    
    if not summary_data:
        print("比較データなし")
        return
    
    df_summary = pd.DataFrame(summary_data)
    df_summary.to_csv(output_dir / "all_conditions_summary.csv", index=False)
    
    # ターゲットごとに比較グラフを作成
    for target in TARGETS:
        df_target = df_summary[df_summary['Target'] == target]
        if df_target.empty:
            continue
        
        metrics = [
            ('Final_MSD_Surface', 'Surface MSD (Å²)'),
            ('Final_Reacted_Al_O', 'Al-O Bonded Count'),
            ('Final_Reacted_Al_OH', 'Al-OH Bonded Count'),
            ('Final_Reacted_Al_F', 'Al-F Bonded Count'),
            ('Final_H2', 'H₂ Count'),
            ('Final_MSD_Bulk', 'Bulk MSD (Å²)'),
        ]
        
        fig, axes = plt.subplots(3, 2, figsize=(20, 24))
        axes = axes.flatten()
        
        conditions = df_target['Condition'].unique()
        colors_cond = plt.cm.Set1(np.linspace(0, 1, len(conditions)))
        
        for i, (col, ylabel) in enumerate(metrics):
            ax = axes[i]
            for j, cond in enumerate(conditions):
                subset = df_target[df_target['Condition'] == cond].sort_values('Temp')
                if not subset.empty:
                    ax.plot(subset['Temp'], subset[col], marker='o', markersize=10,
                           label=cond, color=colors_cond[j], linewidth=2.5)
            
            ax.set_xlabel("Temperature (K)")
            ax.set_ylabel(ylabel)
            ax.legend()
            ax.grid(True, alpha=0.3)
        
        fig.suptitle(f"Cross-Condition Comparison: {target}", fontsize=28, y=0.98)
        plt.tight_layout(rect=[0, 0.02, 1, 0.96])
        plt.savefig(output_dir / f"comparison_{target}_all_conditions.png", dpi=150, bbox_inches='tight')
        plt.close()
        print(f"  保存: comparison_{target}_all_conditions.png")


def plot_arrhenius(all_results, output_dir):
    """
    アレニウスプロット（横2列: Al_Metal, Al_Oxide）
    """
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    for i, target in enumerate(TARGETS):
        ax = axes[i]
        conditions = list(all_results.keys())
        colors_cond = plt.cm.Set1(np.linspace(0, 1, len(conditions)))
        
        for j, cond_name in enumerate(conditions):
            if target not in all_results[cond_name]:
                continue
            
            temp_data = all_results[cond_name][target]
            temps, msd_vals = [], []
            
            for temp, result in sorted(temp_data.items()):
                if result is not None:
                    temps.append(temp)
                    msd_vals.append(result['df']['MSD_Surface'].iloc[-1])
            
            if temps:
                inv_t = 1000 / np.array(temps)
                ln_msd = np.log(np.array(msd_vals) + 1e-5)
                ax.plot(inv_t, ln_msd, marker='s', markersize=10,
                       label=cond_name, color=colors_cond[j], linewidth=2.5)
        
        ax.set_xlabel("1000/T (K⁻¹)")
        ax.set_ylabel("ln(Surface MSD)")
        ax.set_title(f"Arrhenius Plot: {target}")
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    fig.suptitle("Arrhenius Analysis: Activation Energy Comparison", fontsize=28)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.savefig(output_dir / "arrhenius_comparison.png", dpi=150, bbox_inches='tight')
    plt.close()
    print("  保存: arrhenius_comparison.png")


print("条件間比較関数定義完了")

## 6. メイン実行関数

In [None]:
def run_full_analysis(force_reanalyze=False):
    """
    全条件・全温度の解析を実行
    
    Args:
        force_reanalyze: True の場合、既存データを無視して再解析
    """
    print("=" * 60)
    print("統合MD解析 開始")
    if not force_reanalyze:
        print("(既存解析結果がある場合はスキップ)")
    print("=" * 60)
    
    all_results = {}
    
    for cond_name, base_path in CONDITIONS.items():
        print(f"\n[条件: {cond_name}]")
        
        # 出力ディレクトリ作成
        cond_output_dir = OUTPUT_DIR / cond_name
        cond_output_dir.mkdir(parents=True, exist_ok=True)
        
        # analysis_resultsディレクトリ
        results_dir = get_analysis_results_dir(cond_output_dir)
        
        # 既存のサマリーファイルを読み込み
        existing_trends = load_existing_summary(results_dir, 'trends')
        existing_density = load_existing_summary(results_dir, 'density')
        existing_rdf = load_existing_summary(results_dir, 'rdf')
        
        # 解析済みキーを取得
        analyzed_keys = get_analyzed_keys(existing_trends)
        if analyzed_keys:
            print(f"    解析済み: {len(analyzed_keys)}件")
        
        all_results[cond_name] = {}
        
        # 新規解析結果の蓄積用
        new_trends_list = []
        new_density_list = []
        new_rdf_list = []
        
        for target in TARGETS:
            print(f"  ターゲット: {target}")
            all_results[cond_name][target] = {}
            
            for temp in TEMPERATURES:
                label = f"{target}_{temp}K"
                traj_file = base_path / f"{target}_md_{temp}K.traj"
                
                # 既存データのチェック（force_reanalyze=Falseの場合）
                if not force_reanalyze and check_if_analyzed(analyzed_keys, target, temp):
                    # 既存のCSVファイルから結果を読み込み
                    cached_result = load_existing_result_from_csv(results_dir, label)
                    if cached_result is not None:
                        print(f"    スキップ（キャッシュ使用）: {label}")
                        all_results[cond_name][target][temp] = cached_result
                        continue
                
                # 新規解析
                if traj_file.exists():
                    result = analyze_trajectory(
                        traj_file, label, results_dir, 
                        target=target, temp=temp
                    )
                    all_results[cond_name][target][temp] = result
                    
                    # サマリー用データを蓄積
                    if result is not None:
                        df = result['df']
                        final = df.iloc[-1]
                        
                        # trendsサマリー
                        new_trends_list.append({
                            'Label': label,
                            'Target': target,
                            'Temp': temp,
                            'n_al': result['n_al'],
                            'n_surface': result['n_surface'],
                            'n_bulk': result['n_bulk'],
                            'Final_MSD_Surface': final['MSD_Surface'],
                            'Final_MSD_Bulk': final['MSD_Bulk'],
                            'Final_MSD_Total': final['MSD_Total'],
                            'Final_CN_Al_O': final['CN_Al_O'],
                            'Final_CN_Al_F': final['CN_Al_F'],
                            'Final_CN_Al_OH': final['CN_Al_OH'],
                            'Final_Reacted_Al_O': final['Reacted_Al_O'],
                            'Final_Reacted_Al_F': final['Reacted_Al_F'],
                            'Final_Reacted_Al_OH': final['Reacted_Al_OH'],
                            'Final_H2_Count': final['H2_Count'],
                            'Final_LiF_Count': final['LiF_Count'],
                        })
                        
                        # densityサマリー
                        df_dens = result['density']
                        density_row = {'Label': label, 'Target': target, 'Temp': temp}
                        for elem in ['Al', 'O', 'F', 'Li', 'H', 'Na']:
                            if elem in df_dens.columns:
                                density_row[f'{elem}_total'] = df_dens[elem].sum()
                                density_row[f'{elem}_max'] = df_dens[elem].max()
                        new_density_list.append(density_row)
                        
                        # rdfサマリー
                        rdf_row = {'Label': label, 'Target': target, 'Temp': temp}
                        for pair_name in ['Al_O', 'Al_F']:
                            if pair_name in result['rdf']:
                                r, gr = result['rdf'][pair_name]
                                if len(gr) > 0:
                                    rdf_row[f'{pair_name}_peak_r'] = r[np.argmax(gr)]
                                    rdf_row[f'{pair_name}_peak_gr'] = np.max(gr)
                        new_rdf_list.append(rdf_row)
                else:
                    print(f"    スキップ: {traj_file.name} (ファイルなし)")
                    all_results[cond_name][target][temp] = None
            
            # 条件ごとのプロット作成
            print(f"  プロット作成中...")
            plot_single_condition_summary(
                all_results[cond_name][target], cond_name, target, cond_output_dir
            )
            plot_rdf_comparison(
                all_results[cond_name][target], cond_name, target, cond_output_dir
            )
            plot_density_profiles(
                all_results[cond_name][target], cond_name, target, cond_output_dir
            )
        
        # 新規解析結果をサマリーファイルに保存
        if new_trends_list:
            save_summary_file(results_dir, 'trends', pd.DataFrame(new_trends_list))
        if new_density_list:
            save_summary_file(results_dir, 'density', pd.DataFrame(new_density_list))
        if new_rdf_list:
            save_summary_file(results_dir, 'rdf', pd.DataFrame(new_rdf_list))
    
    # 条件間比較
    print("\n" + "=" * 60)
    print("条件間比較グラフ作成中...")
    print("=" * 60)
    plot_cross_condition_comparison(all_results, OUTPUT_DIR)
    plot_arrhenius(all_results, OUTPUT_DIR)
    
    print("\n" + "=" * 60)
    print(f"解析完了! 結果は {OUTPUT_DIR} に保存されました。")
    print("=" * 60)
    
    return all_results


print("メイン実行関数定義完了")

## 7. 解析実行

In [None]:
# 解析実行
# force_reanalyze=False: 既存の解析結果がある場合はスキップ（デフォルト）
# force_reanalyze=True: 全データを強制的に再解析
results = run_full_analysis(force_reanalyze=False)

## 8. 追加解析（オプション）

必要に応じて個別の解析を実行できます。

In [None]:
# 特定の条件のみ再解析する場合
def reanalyze_single_condition(cond_name, target, temp, force=True):
    """
    単一条件の再解析
    
    Args:
        cond_name: 条件名（例: 'Li-F-OH'）
        target: ターゲット（例: 'Al_Metal'）
        temp: 温度（例: 600）
        force: True=キャッシュを無視して再解析, False=キャッシュがあれば使用
    """
    if cond_name not in CONDITIONS:
        print(f"条件 {cond_name} が見つかりません")
        return None
    
    base_path = CONDITIONS[cond_name]
    traj_file = base_path / f"{target}_md_{temp}K.traj"
    
    # 出力ディレクトリ
    cond_output_dir = OUTPUT_DIR / cond_name
    cond_output_dir.mkdir(parents=True, exist_ok=True)
    results_dir = get_analysis_results_dir(cond_output_dir)
    
    label = f"{target}_{temp}K"
    
    # キャッシュチェック
    if not force:
        existing_trends = load_existing_summary(results_dir, 'trends')
        analyzed_keys = get_analyzed_keys(existing_trends)
        if check_if_analyzed(analyzed_keys, target, temp):
            cached_result = load_existing_result_from_csv(results_dir, label)
            if cached_result is not None:
                print(f"キャッシュから読み込み: {label}")
                return cached_result
    
    if not traj_file.exists():
        print(f"ファイルが見つかりません: {traj_file}")
        return None
    
    result = analyze_trajectory(traj_file, label, results_dir, target=target, temp=temp)
    
    # サマリーファイルを更新
    if result is not None:
        df = result['df']
        final = df.iloc[-1]
        
        # trendsサマリー
        trends_data = pd.DataFrame([{
            'Label': label,
            'Target': target,
            'Temp': temp,
            'n_al': result['n_al'],
            'n_surface': result['n_surface'],
            'n_bulk': result['n_bulk'],
            'Final_MSD_Surface': final['MSD_Surface'],
            'Final_MSD_Bulk': final['MSD_Bulk'],
            'Final_MSD_Total': final['MSD_Total'],
            'Final_CN_Al_O': final['CN_Al_O'],
            'Final_CN_Al_F': final['CN_Al_F'],
            'Final_CN_Al_OH': final['CN_Al_OH'],
            'Final_Reacted_Al_O': final['Reacted_Al_O'],
            'Final_Reacted_Al_F': final['Reacted_Al_F'],
            'Final_Reacted_Al_OH': final['Reacted_Al_OH'],
            'Final_H2_Count': final['H2_Count'],
            'Final_LiF_Count': final['LiF_Count'],
        }])
        save_summary_file(results_dir, 'trends', trends_data)
        
        # densityサマリー
        df_dens = result['density']
        density_row = {'Label': label, 'Target': target, 'Temp': temp}
        for elem in ['Al', 'O', 'F', 'Li', 'H', 'Na']:
            if elem in df_dens.columns:
                density_row[f'{elem}_total'] = df_dens[elem].sum()
                density_row[f'{elem}_max'] = df_dens[elem].max()
        save_summary_file(results_dir, 'density', pd.DataFrame([density_row]))
        
        # rdfサマリー
        rdf_row = {'Label': label, 'Target': target, 'Temp': temp}
        for pair_name in ['Al_O', 'Al_F']:
            if pair_name in result['rdf']:
                r, gr = result['rdf'][pair_name]
                if len(gr) > 0:
                    rdf_row[f'{pair_name}_peak_r'] = r[np.argmax(gr)]
                    rdf_row[f'{pair_name}_peak_gr'] = np.max(gr)
        save_summary_file(results_dir, 'rdf', pd.DataFrame([rdf_row]))
    
    return result


# 使用例（必要に応じてコメントアウトを外して実行）
# result = reanalyze_single_condition("Li-F-OH", "Al_Metal", 600, force=True)

In [None]:
# 結果のサマリー表示
def display_summary(results):
    """
    解析結果のサマリーを表示
    """
    summary_rows = []
    
    for cond_name, targets_data in results.items():
        for target, temp_data in targets_data.items():
            for temp, result in temp_data.items():
                if result is not None:
                    df = result['df']
                    final = df.iloc[-1]
                    summary_rows.append({
                        '条件': cond_name,
                        'ターゲット': target,
                        '温度(K)': temp,
                        'Al原子数': result['n_al'],
                        '表面Al': result['n_surface'],
                        '最終MSD_Surface': f"{final['MSD_Surface']:.2f}",
                        '最終Al-O結合': int(final['Reacted_Al_O']),
                        '最終H2数': int(final['H2_Count']),
                    })
    
    df_summary = pd.DataFrame(summary_rows)
    return df_summary


# サマリー表示
if 'results' in dir():
    summary_df = display_summary(results)
    display(summary_df)