In [None]:
# 必要なライブラリをインポート
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import logging
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from sqlalchemy import create_engine, text
import torch.nn.functional as F
import japanize_matplotlib

# 自作モジュールをインポート
from _dataset import HorguesDataset
from _models import HorguesModel
from horgues3.losses import HorguesLoss
from horgues3.odds import get_odds_dataframes
from horgues3.haraimodoshi import get_haraimodoshi_dataframes
from horgues3.database import create_database_engine

In [None]:
# ログ設定
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [None]:
# デバイス設定
device = torch.device('cuda:1')
logger.info(f"使用デバイス: {device}")

In [None]:
# 学習パラメータ設定
BATCH_SIZE = 128

In [None]:
checkpoint = torch.load('outputs/best_model.pth', map_location='cpu')

In [None]:
# 学習期間・検証期間設定
START_DATE = '20230101'
END_DATE = '20231231'

logger.info(f"期間: {START_DATE} - {END_DATE}")

In [None]:
# データセットの作成
logger.info("データセットを作成中...")
dataset = HorguesDataset(
    start_date=START_DATE,
    end_date=END_DATE,
    preprocessing_params=checkpoint['preprocessing_params'],
    cache_dir='cache/test',
    use_cache=True
)

logger.info(f"データサイズ: {len(dataset)}")

In [None]:
loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [None]:
# モデル設定の取得
model_config = checkpoint['model_config']
logger.info(f"数値特徴量数: {len(model_config['numerical_features'])}")
logger.info(f"カテゴリ特徴量数: {len(model_config['categorical_features'])}")
logger.info(f"時系列データ: {model_config['sequence_names']}")

In [None]:
# モデルの作成
model = HorguesModel(
    sequence_names=model_config['sequence_names'],
    feature_aliases=model_config['feature_aliases'],
    numerical_features=model_config['numerical_features'],
    categorical_features=model_config['categorical_features'],
).to(device)

In [None]:
model.load_state_dict(checkpoint['model_state_dict'])

In [None]:
race_ids = []
masks = []
targets = []
scores = []

model.eval()
with torch.no_grad():
    pbar = tqdm(loader, desc='Predict')
    for batch in pbar:
        race_ids.extend(batch['race_id'])
        masks.append(batch['mask'])
        targets.append(batch['target'])

        x_num = {k: v.to(device) for k, v in batch['x_num'].items()}
        x_cat = {k: v.to(device) for k, v in batch['x_cat'].items()}
        sequence_data = {}
        for seq_name, seq_data in batch['sequence_data'].items():
            sequence_data[seq_name] = {
                'x_num': {k: v.to(device) for k, v in seq_data['x_num'].items()},
                'x_cat': {k: v.to(device) for k, v in seq_data['x_cat'].items()},
                'mask': seq_data['mask'].to(device)
            }
        mask = batch['mask'].to(device)
        target = batch['target'].to(device)

        scores.append(model(x_num, x_cat, sequence_data, mask).cpu())

    masks = torch.cat(masks, dim=0)
    targets = torch.cat(targets, dim=0)
    scores = torch.cat(scores, dim=0)

In [None]:
odds = get_odds_dataframes(START_DATE, END_DATE)
haraimodoshi = get_haraimodoshi_dataframes(START_DATE, END_DATE)

In [None]:
def convert_to_series(tensor, name):
    tmp = pd.DataFrame(tensor, index=race_ids, columns=np.arange(1, 19))
    tmp = tmp.reset_index(names='race_id').melt(id_vars='race_id', var_name='uma_number')
    tmp = tmp.set_index(['race_id', 'uma_number']).value.rename(name)
    return tmp

In [None]:
df = pd.DataFrame(
    {
        'score_0': convert_to_series(F.sigmoid(scores[:, :, 0]), 'score_0'),
        'score_1': convert_to_series(F.sigmoid(scores[:, :, 1]), 'score_1'),
        'score_2': convert_to_series(F.sigmoid(scores[:, :, 2]), 'score_2'),
        'target_0': convert_to_series(targets[:, :, 0], 'target_0'),
        'target_1': convert_to_series(targets[:, :, 1], 'target_1'),
        'target_2': convert_to_series(targets[:, :, 2], 'target_2'),
        'mask': convert_to_series(masks, 'mask'),
    }
)
df = df[df['mask']].sort_index()

In [None]:
df['score_ranking_0'] = df.groupby('race_id')['score_0'].rank(method='first', ascending=False)
df['score_ranking_1'] = df.groupby('race_id')['score_1'].rank(method='first', ascending=False)
df['score_ranking_2'] = df.groupby('race_id')['score_2'].rank(method='first', ascending=False)

In [None]:
# 三連単フォーメーション購入シミュレーション（並列処理版）
from itertools import product
import pandas as pd
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import partial
import multiprocessing as mp
from tqdm import tqdm
import time

def simulate_sanrentan_formation(df, odds, haraimodoshi, thresholds):
    """
    三連単フォーメーション購入シミュレーション
    
    Parameters:
    -----------
    df : pd.DataFrame
        予測結果データ
    odds : dict
        オッズデータ
    haraimodoshi : dict
        払戻データ
    thresholds : dict
        閾値設定 {'score_0', 'score_1', 'score_2', 'score_ranking_0', 'score_ranking_1', 'score_ranking_2', 'min_odds'}
    
    Returns:
    --------
    dict
        シミュレーション結果
    """
    results = []
    
    # レース毎にシミュレーション
    for race_id in df.index.get_level_values('race_id').unique():
        race_df = df.loc[race_id]
        
        # 1着候補の選定
        first_candidates = race_df[
            (race_df['score_0'] >= thresholds['score_0']) & 
            (race_df['score_ranking_0'] <= thresholds['score_ranking_0'])
        ].index.values
        
        # 2着候補の選定
        second_candidates = race_df[
            (race_df['score_1'] >= thresholds['score_1']) & 
            (race_df['score_ranking_1'] <= thresholds['score_ranking_1'])
        ].index.values
        
        # 3着候補の選定
        third_candidates = race_df[
            (race_df['score_2'] >= thresholds['score_2']) & 
            (race_df['score_ranking_2'] <= thresholds['score_ranking_2'])
        ].index.values
        
        # フォーメーション購入対象の組み合わせ生成
        purchases = []
        total_bet = 0
        
        for first, second, third in product(first_candidates, second_candidates, third_candidates):
            if first != second and second != third and first != third:
                # 組み合わせ文字列を作成
                combination = f"{first:02d}-{second:02d}-{third:02d}"
                
                # オッズをチェック
                if race_id in odds['sanrentan'].index:
                    if combination in odds['sanrentan'].columns:
                        odds_value = odds['sanrentan'].loc[race_id, combination]
                        if odds_value >= thresholds['min_odds']:
                            purchases.append(combination)
                            total_bet += 100  # 100円購入と仮定
        
        # 的中チェック
        payout = 0
        hit_count = 0
        if race_id in haraimodoshi['sanrentan'].index and purchases:
            for combination in purchases:
                if combination in haraimodoshi['sanrentan'].columns:
                    payout_value = haraimodoshi['sanrentan'].loc[race_id, combination]
                    if payout_value > 0:
                        payout = payout_value
                        hit_count = 1
                        break
        
        if total_bet > 0:
            results.append({
                'race_id': race_id,
                'total_bet': total_bet,
                'payout': payout,
                'hit_count': hit_count,
                'purchase_count': len(purchases)
            })
    
    return pd.DataFrame(results)

def evaluate_thresholds_worker(args):
    """
    ワーカー関数：閾値の評価を行う（並列処理用）
    """
    thresholds, df, odds, haraimodoshi = args
    
    try:
        sim_results = simulate_sanrentan_formation(df, odds, haraimodoshi, thresholds)
        
        if len(sim_results) == 0:
            result = {
                'total_bet': 0,
                'total_payout': 0,
                'hit_count': 0,
                'recovery_rate': 0,
                'max_payout': 0,
                'evaluation_score': 0,
                'race_count': 0
            }
        else:
            total_bet = sim_results['total_bet'].sum()
            total_payout = sim_results['payout'].sum()
            hit_count = sim_results['hit_count'].sum()
            max_payout = sim_results['payout'].max()
            
            # 回収率計算
            recovery_rate = total_payout / total_bet if total_bet > 0 else 0
            
            # 評価値計算: ((払戻総額 - 最高払戻額) / 購入総額) ** 的中回数
            if hit_count > 0 and total_bet > 0:
                adjusted_recovery_rate = (total_payout - max_payout) / total_bet
                evaluation_score = adjusted_recovery_rate ** hit_count
            else:
                evaluation_score = 0
            
            result = {
                'total_bet': total_bet,
                'total_payout': total_payout,
                'hit_count': hit_count,
                'recovery_rate': recovery_rate,
                'max_payout': max_payout,
                'evaluation_score': evaluation_score,
                'race_count': len(sim_results)
            }
        
        # 閾値情報を結果に追加
        result.update(thresholds)
        return result
        
    except Exception as e:
        print(f"Error in worker: {e}")
        # エラーの場合はダミーの結果を返す
        result = {
            'total_bet': 0,
            'total_payout': 0,
            'hit_count': 0,
            'recovery_rate': 0,
            'max_payout': 0,
            'evaluation_score': 0,
            'race_count': 0
        }
        result.update(thresholds)
        return result

def create_threshold_combinations():
    """
    閾値の組み合わせを生成
    """
    score_candidates = [0.1, 0.15, 0.2, 0.25, 0.3]
    ranking_candidates = [1, 18]
    min_odds_candidates = [1.5, 2, 3, 4, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 40, 50]
    
    combinations = []
    for score_0 in score_candidates:
        for score_1 in score_candidates:
            for score_2 in score_candidates:
                for ranking_0 in ranking_candidates:
                    for ranking_1 in ranking_candidates:
                        for ranking_2 in ranking_candidates:
                            for min_odds in min_odds_candidates:
                                thresholds = {
                                    'score_0': score_0,
                                    'score_1': score_1,
                                    'score_2': score_2,
                                    'score_ranking_0': ranking_0,
                                    'score_ranking_1': ranking_1,
                                    'score_ranking_2': ranking_2,
                                    'min_odds': min_odds
                                }
                                combinations.append(thresholds)
    
    return combinations

def grid_search_optimal_thresholds_parallel(df, odds, haraimodoshi, n_workers=None):
    """
    並列処理でグリッドサーチを実行し、最適な閾値を探索
    """
    if n_workers is None:
        n_workers = min(mp.cpu_count(), 8)  # 最大8プロセス
    
    print(f"使用プロセス数: {n_workers}")
    
    # 閾値の組み合わせを生成
    threshold_combinations = create_threshold_combinations()
    total_combinations = len(threshold_combinations)
    print(f"総組み合わせ数: {total_combinations:,}")
    
    # 引数リストを作成
    args_list = [(thresholds, df, odds, haraimodoshi) for thresholds in threshold_combinations]
    
    best_score = -1
    best_thresholds = None
    best_result = None
    results_list = []
    
    start_time = time.time()
    
    # 並列処理で実行
    with ProcessPoolExecutor(max_workers=n_workers) as executor:
        # タスクを投入
        future_to_args = {executor.submit(evaluate_thresholds_worker, args): args for args in args_list}
        
        # 進捗バーで結果を取得
        with tqdm(total=total_combinations, desc='Grid Search Progress', 
                 bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') as pbar:
            
            completed_count = 0
            for future in as_completed(future_to_args):
                try:
                    result = future.result()
                    results_list.append(result)
                    
                    # 最良の結果を更新
                    if result['evaluation_score'] > best_score:
                        best_score = result['evaluation_score']
                        best_thresholds = {k: result[k] for k in ['score_0', 'score_1', 'score_2', 
                                                                 'score_ranking_0', 'score_ranking_1', 
                                                                 'score_ranking_2', 'min_odds']}
                        best_result = result.copy()
                    
                    completed_count += 1
                    pbar.update(1)
                    
                    # 10%ごとに中間結果を表示
                    if completed_count % (total_combinations // 10) == 0:
                        elapsed_time = time.time() - start_time
                        progress_pct = (completed_count / total_combinations) * 100
                        pbar.set_postfix({
                            'Progress': f'{progress_pct:.1f}%',
                            'Best Score': f'{best_score:.6f}',
                            'Elapsed': f'{elapsed_time:.1f}s'
                        })
                        
                except Exception as e:
                    print(f"タスクでエラーが発生: {e}")
                    pbar.update(1)
    
    elapsed_time = time.time() - start_time
    print(f"\n並列処理完了時間: {elapsed_time:.2f}秒")
    print(f"1秒あたりの処理数: {total_combinations / elapsed_time:.1f}")
    
    return best_thresholds, best_result, pd.DataFrame(results_list)

In [None]:
print("三連単フォーメーション購入シミュレーションを開始...")
print(f"利用可能CPU数: {mp.cpu_count()}")

print("\n=== 並列処理版でグリッドサーチを実行 ===")
best_thresholds, best_result, all_results = grid_search_optimal_thresholds_parallel(
    df, odds, haraimodoshi, n_workers=6
)
    
# 結果表示
print(f"\n{'='*50}")
print("最適化完了！")
print(f"{'='*50}")

print(f"\n最適な閾値:")
for key, value in best_thresholds.items():
    print(f"  {key}: {value}")

print(f"\n最適閾値での結果:")
print(f"  購入総額: {best_result['total_bet']:,}円")
print(f"  払戻総額: {best_result['total_payout']:,}円")
print(f"  的中回数: {best_result['hit_count']}回")
print(f"  回収率: {best_result['recovery_rate']:.3f}")
print(f"  最高払戻: {best_result['max_payout']:,}円")
print(f"  評価値: {best_result['evaluation_score']:.6f}")
print(f"  レース数: {best_result['race_count']}レース")

# 上位結果も表示
print(f"\n上位10件の結果:")
top_results = all_results.nlargest(10, 'evaluation_score')
print(top_results[['evaluation_score', 'recovery_rate', 'hit_count', 'total_bet', 'total_payout']])

In [None]:
def analyze_and_plot_results(all_results):
    """
    結果をグループ化して利益、購入回数、的中率のグラフを作成
    """
    # 利益を計算（払戻総額 - 購入総額）
    all_results['profit'] = all_results['total_payout'] - all_results['total_bet']
    
    # 的中率を計算 (bet_count -> race_count に修正)
    all_results['hit_rate'] = all_results['hit_count'] / all_results['race_count']
    
    # グループ化のためのキーを作成
    all_results['group_key'] = (
        all_results['score_0'].astype(str) + '_' +
        all_results['score_1'].astype(str) + '_' +
        all_results['score_2'].astype(str) + '_' +
        all_results['score_ranking_0'].astype(str) + '_' +
        all_results['score_ranking_1'].astype(str) + '_' +
        all_results['score_ranking_2'].astype(str)
    )
    
    # グループごとの最高評価値を計算
    group_max_scores = all_results.groupby('group_key')['evaluation_score'].max().reset_index()
    group_max_scores = group_max_scores.sort_values('evaluation_score', ascending=False)
    
    # 上位5グループを選択
    top_5_groups = group_max_scores.head(5)['group_key'].values
    
    # 上位5グループのデータを抽出
    top_data = all_results[all_results['group_key'].isin(top_5_groups)].copy()
    
    # グループ名を作成（より読みやすく）
    group_names = {}
    for i, group_key in enumerate(top_5_groups):
        parts = group_key.split('_')
        score_0, score_1, score_2 = parts[0], parts[1], parts[2]
        ranking_0, ranking_1, ranking_2 = parts[3], parts[4], parts[5]
        group_names[group_key] = f"Group{i+1}: S({score_0},{score_1},{score_2}) R({ranking_0},{ranking_1},{ranking_2})"
    
    top_data['group_name'] = top_data['group_key'].map(group_names)
    
    # カラーパレットを設定
    colors = plt.cm.Set1(np.linspace(0, 1, 5))
    
    # 3つのサブプロットを作成
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    fig.suptitle('Top 5 Parameter Groups Analysis', fontsize=16, y=0.98)
    
    # 1. 利益のグラフ
    ax1 = axes[0, 0]
    for i, (group_key, group_name) in enumerate(group_names.items()):
        group_data = top_data[top_data['group_key'] == group_key]
        profit_by_odds = group_data.groupby('min_odds')['profit'].mean().reset_index()
        
        ax1.plot(profit_by_odds['min_odds'], profit_by_odds['profit'], 
                marker='o', linewidth=2, markersize=6, 
                color=colors[i], label=group_name)
    
    ax1.set_xlabel('Min Odds Threshold', fontsize=11)
    ax1.set_ylabel('Average Profit (円)', fontsize=11)
    ax1.set_title('Profit vs Min Odds Threshold', fontsize=12)
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7)
    
    # 2. 購入回数のグラフ (bet_count -> race_count に修正)
    ax2 = axes[0, 1]
    for i, (group_key, group_name) in enumerate(group_names.items()):
        group_data = top_data[top_data['group_key'] == group_key]
        race_count_by_odds = group_data.groupby('min_odds')['race_count'].mean().reset_index()
        
        ax2.plot(race_count_by_odds['min_odds'], race_count_by_odds['race_count'], 
                marker='s', linewidth=2, markersize=6, 
                color=colors[i], label=group_name)
    
    ax2.set_xlabel('Min Odds Threshold', fontsize=11)
    ax2.set_ylabel('Average Race Count', fontsize=11)
    ax2.set_title('Race Count vs Min Odds Threshold', fontsize=12)
    ax2.grid(True, alpha=0.3)
    
    # 3. 的中率のグラフ
    ax3 = axes[1, 0]
    for i, (group_key, group_name) in enumerate(group_names.items()):
        group_data = top_data[top_data['group_key'] == group_key]
        hit_rate_by_odds = group_data.groupby('min_odds')['hit_rate'].mean().reset_index()
        
        ax3.plot(hit_rate_by_odds['min_odds'], hit_rate_by_odds['hit_rate'] * 100, 
                marker='^', linewidth=2, markersize=6, 
                color=colors[i], label=group_name)
    
    ax3.set_xlabel('Min Odds Threshold', fontsize=11)
    ax3.set_ylabel('Hit Rate (%)', fontsize=11)
    ax3.set_title('Hit Rate vs Min Odds Threshold', fontsize=12)
    ax3.grid(True, alpha=0.3)
    
    # 4. 回収率のグラフ
    ax4 = axes[1, 1]
    for i, (group_key, group_name) in enumerate(group_names.items()):
        group_data = top_data[top_data['group_key'] == group_key]
        recovery_by_odds = group_data.groupby('min_odds')['recovery_rate'].mean().reset_index()
        
        ax4.plot(recovery_by_odds['min_odds'], recovery_by_odds['recovery_rate'] * 100, 
                marker='D', linewidth=2, markersize=6, 
                color=colors[i], label=group_name)
    
    ax4.set_xlabel('Min Odds Threshold', fontsize=11)
    ax4.set_ylabel('Recovery Rate (%)', fontsize=11)
    ax4.set_title('Recovery Rate vs Min Odds Threshold', fontsize=12)
    ax4.grid(True, alpha=0.3)
    ax4.axhline(y=100, color='red', linestyle='--', alpha=0.7)
    
    # 凡例を図の外側に配置
    handles, labels = ax1.get_legend_handles_labels()
    fig.legend(handles, labels, loc='center right', bbox_to_anchor=(1.02, 0.5), fontsize=10)
    
    plt.tight_layout()
    plt.subplots_adjust(right=0.85)
    plt.show()

    return top_data, group_names

In [None]:
# 結果分析とグラフ作成
top_data, group_names = analyze_and_plot_results(all_results)