# AlphaZero Gomoku Training Analysis (Enhanced Version)

This notebook provides a comprehensive analysis of the new training log format, with added multi-dimensional evaluation comparisons and training stability monitoring.

In [None]:
# 解析新版日志文件
log_path = '7x5_train_log_20250505_174141.txt'  # 修改为实际路径

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from scipy import stats
from collections import defaultdict

# 可视化设置
plt.style.use('classic')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 12
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3

## 1. 新版日志解析器

In [None]:
def parse_enhanced_log(log_path):
    """解析增强版日志格式"""
    train_data = []
    eval_data = []
    
    # 新增正则模式
    batch_pattern = re.compile(r'训练批次: (\d+), 当前对局长度: (\d+)')
    train_metrics_pattern = re.compile(
        r'kl:([\d.]+), lr_multiplier:([\d.]+), loss:([\d.]+), entropy:([\d.]+), '
        r'explained_var_old:([\d.-]+), explained_var_new:([\d.-]+)')
    
    # 新版评估日志解析
    eval_start_pattern = re.compile(r'$$评估 vs (\w+)$$')
    eval_config_pattern = re.compile(
        r'主MCTS: n_playout=(\d+) \| c_puct=([\d.]+).*?\
.*?基准AI: (.+)')
    eval_result_pattern = re.compile(
        r'胜/负/平: (\d+)/(\d+)/(\d+).*?胜率: ([\d.]+)%')
    eval_quality_pattern = re.compile(
        r'平均步数: ([\d.]+).*?优势波动: ([\d.]+).*?最大失误: ([\d.]+).*?关键转折点: ([\d.]+)')
    
    current_eval = {}
    
    with open(log_path, 'r', encoding='utf-8') as f:
        for line in f:
            # 训练数据解析（保持不变）
            if batch_match := batch_pattern.search(line):
                current_batch = int(batch_match.group(1))
                current_episode_len = int(batch_match.group(2))
                continue
            
            if metrics_match := train_metrics_pattern.search(line):
                train_data.append({
                    'batch': current_batch,
                    'episode_len': current_episode_len,
                    'kl': float(metrics_match.group(1)),
                    'lr_multiplier': float(metrics_match.group(2)),
                    'loss': float(metrics_match.group(3)),
                    'entropy': float(metrics_match.group(4)),
                    'explained_var_old': float(metrics_match.group(5)),
                    'explained_var_new': float(metrics_match.group(6))
                })
                continue
            
            # 新版评估数据解析
            if eval_start_match := eval_start_pattern.search(line):
                current_eval = {'opponent': eval_start_match.group(1).lower()}
            
            elif eval_config_match := eval_config_pattern.search(line):
                current_eval.update({
                    'main_n_playout': int(eval_config_match.group(1)),
                    'main_c_puct': float(eval_config_match.group(2)),
                    'opponent_config': eval_config_match.group(3)
                })
            
            elif eval_result_match := eval_result_pattern.search(line):
                current_eval.update({
                    'wins': int(eval_result_match.group(1)),
                    'losses': int(eval_result_match.group(2)),
                    'draws': int(eval_result_match.group(3)),
                    'win_ratio': float(eval_result_match.group(4))/100
                })
            
            elif eval_quality_match := eval_quality_pattern.search(line):
                current_eval.update({
                    'avg_moves': float(eval_quality_match.group(1)),
                    'advantage_std': float(eval_quality_match.group(2)),
                    'max_blunder': float(eval_quality_match.group(3)),
                    'turning_points': float(eval_quality_match.group(4))
                })
            
                # 当收集完所有评估数据后存入列表
                if all(k in current_eval for k in ['opponent', 'win_ratio', 'avg_moves']):
                    eval_data.append(current_eval)
    
    train_df = pd.DataFrame(train_data)
    eval_df = pd.DataFrame(eval_data)
    
    # 添加批次信息到评估数据
    if not eval_df.empty:
        eval_batches = sorted(train_df['batch'].unique())
        eval_df['eval_batch'] = [eval_batches[i % len(eval_batches)] 
                               for i in range(len(eval_df))]
    
    return train_df, eval_df

# 解析日志
train_df, eval_df = parse_enhanced_log(log_path)

# 保存数据
train_df.to_csv('enhanced_train_metrics.csv', index=False)
eval_df.to_csv('enhanced_eval_metrics.csv', index=False)

print(f"解析到 {len(train_df)} 条训练记录和 {len(eval_df)} 条评估记录")
eval_df.head()

## 2. 多维度评估分析

In [None]:
# 对手类型胜率对比
if not eval_df.empty:
    plt.figure(figsize=(14, 7))
    
    # 按对手类型分组计算平均胜率
    opponent_stats = eval_df.groupby('opponent')['win_ratio'].agg(['mean', 'std', 'count'])
    
    # 绘制柱状图
    bars = plt.bar(opponent_stats.index, opponent_stats['mean'], 
                  yerr=opponent_stats['std'], capsize=10, alpha=0.7, color='skyblue')
    
    # 添加数值标签
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom')
    
    plt.title('Average Win Rate Comparison Against Different Opponents', fontsize=16)
    plt.ylabel('Win Rate')
    plt.ylim(0, 1.1)
    plt.grid(True, axis='y', alpha=0.3)
    
    # 添加评估局数信息
    for i, (opp, row) in enumerate(opponent_stats.iterrows()):
        plt.text(i, 0.05, f'{int(row["count"])} games', ha='center', color='white')
    
    plt.tight_layout()
    plt.savefig('opponent_win_rates.png', dpi=300)
    plt.show()
    
    # 显示详细统计数据
    display(opponent_stats)

In [None]:
# 棋局质量指标热力图
if not eval_df.empty:
    quality_metrics = ['avg_moves', 'advantage_std', 'max_blunder', 'turning_points']
    
    plt.figure(figsize=(12, 8))
    corr_matrix = eval_df[quality_metrics].corr()
    
    # 使用 pcolor 替代 heatmap
    plt.pcolor(corr_matrix, cmap='coolwarm')
    plt.colorbar()
    
    # 添加文本标注
    for i in range(len(corr_matrix.columns)):
        for j in range(len(corr_matrix.columns)):
            plt.text(j + 0.5, i + 0.5, f'{corr_matrix.iloc[i, j]:.2f}', 
                     horizontalalignment='center', verticalalignment='center', fontsize=12)
    
    plt.xticks(np.arange(0.5, len(corr_matrix.columns)), corr_matrix.columns, rotation=45)
    plt.yticks(np.arange(0.5, len(corr_matrix.columns)), corr_matrix.columns)
    plt.title('Correlation Analysis of Game Quality Metrics', fontsize=16)
    plt.tight_layout()
    plt.savefig('game_quality_correlation.png', dpi=300)
    plt.show()
    
    # 各指标描述统计
    print("Descriptive Statistics for Game Quality Metrics:")
    display(eval_df[quality_metrics].describe())

## 3. 训练过程监控（新增功能）

In [None]:
# 训练指标与评估胜率的关联分析
if not train_df.empty and not eval_df.empty:
    # 合并数据
    merged = pd.merge_asof(
        eval_df.sort_values('eval_batch'),
        train_df.sort_values('batch'),
        left_on='eval_batch', right_on='batch',
        direction='backward'
    ).dropna()
    
    # 选择关键指标
    metrics = ['loss', 'entropy', 'kl', 'explained_var_new']
    
    # 创建子图
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()
    
    colors = {'pure_mcts': 'blue', 'minimax_ab': 'green', 'random': 'red'}
    markers = {'pure_mcts': 'o', 'minimax_ab': '^', 'random': 's'}
    
    for i, metric in enumerate(metrics):
        ax = axes[i]
        
        # 按照对手类型绘制散点图
        for opp in merged['opponent'].unique():
            subset = merged[merged['opponent'] == opp]
            ax.scatter(subset[metric], subset['win_ratio'],
                       label=opp, color=colors[opp], marker=markers[opp], s=100)
        
        # 添加趋势线
        z = np.polyfit(merged[metric], merged['win_ratio'], 1)
        p = np.poly1d(z)
        x_range = np.linspace(merged[metric].min(), merged[metric].max(), 100)
        ax.plot(x_range, p(x_range), 'k--', label='Trend Line')
        
        # 计算并显示相关系数
        corr = merged[[metric, 'win_ratio']].corr().iloc[0,1]
        ax.text(0.05, 0.95, f'ρ = {corr:.2f}', transform=ax.transAxes,
               fontsize=12, bbox=dict(facecolor='white', alpha=0.8))
        
        ax.set_title(f'Win Rate vs {metric}')
        ax.legend()
    
    plt.suptitle('Association Between Training Metrics and Win Rate', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.savefig('metrics_vs_winrate.png', dpi=300)
    plt.show()

## 4. 高级分析功能

In [None]:
# 训练阶段划分与对比
if not train_df.empty:
    # 自动划分训练阶段
    num_stages = 4
    train_df['stage'] = pd.qcut(train_df['batch'], q=num_stages, labels=False) + 1
    
    # 计算各阶段平均指标
    stage_metrics = train_df.groupby('stage').agg({
        'loss': ['mean', 'std'],
        'entropy': ['mean', 'std'],
        'kl': ['mean', 'std'],
        'explained_var_new': ['mean', 'std']
    })
    
    # 可视化阶段对比
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()
    
    for i, metric in enumerate(['loss', 'entropy', 'kl', 'explained_var_new']):
        ax = axes[i]
        
        # 绘制柱状图
        x = stage_metrics.index
        y = stage_metrics[(metric, 'mean')]
        y_err = stage_metrics[(metric, 'std')]
        
        bars = ax.bar(x, y, yerr=y_err, capsize=5, alpha=0.7, color='lightgreen')
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.2f}', ha='center', va='bottom')
        
        ax.set_title(f'{metric} Across Stages')
        ax.set_xlabel('Training Stage')
        ax.set_ylabel(metric)
    
    plt.suptitle('Comparison of Training Metrics Across Different Stages', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.savefig('training_stages.png', dpi=300)
    plt.show()
    
    # 显示详细数据
    print("Metrics Across Training Stages:")
    display(stage_metrics)

## 5. 分析结论与建议

### 关键发现

1. **多对手评估**：
   - 模型对`pure_mcts`胜率最高（平均XX%），表明神经网络有效提升了搜索效率
   - 对`minimax`类对手表现差异反映了不同算法的特性

2. **训练稳定性**：
   - KL散度波动范围XX-XX，表明策略更新幅度适中
   - 损失值持续下降，显示模型正在有效学习

3. **棋局质量**：
   - 平均对局长度XX步，关键转折点XX个/局
   - 优势波动与最大失误呈XX相关（ρ=XX）

### 优化建议

1. **训练参数**：
   - 当KL散度>XX时降低学习率乘数
   - 增加对`minimax_ab`的评估频率

2. **评估改进**：
   - 对每个对手至少评估30局以提高统计显著性
   - 添加人类先验知识对手作为基准

3. **模型架构**：
   - 若解释方差<XX，考虑增大价值网络容量
   - 尝试不同的MCTS模拟次数配置

### 后续步骤

1. 运行更长周期的训练（建议XX批次）
2. 在更大棋盘上测试模型泛化能力
3. 添加开局库测试特定场景表现