In [None]:
# 必要なライブラリをインポート
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import logging
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# 自作モジュールをインポート
from horgues3.dataset import HorguesDataset
from horgues3.models import HorguesModel
from horgues3.losses import WeightedPlackettLuceLoss

In [None]:
# ログ設定
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# デバイス設定
device = torch.device('cuda:1')
logger.info(f"使用デバイス: {device}")

In [None]:
# 学習パラメータ設定
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
NUM_EPOCHS = 50
PATIENCE = 10
NUM_HORSES = 18
HORSE_HISTORY_LENGTH = 10
HISTORY_DAYS = 365
EXCLUDE_HOURS_BEFORE_RACE = 2

In [None]:
# 学習期間・検証期間設定
TRAIN_START_DATE = '20140101'
TRAIN_END_DATE = '20231231'
VAL_START_DATE = '20240101'
VAL_END_DATE = '20241231'

logger.info(f"学習期間: {TRAIN_START_DATE} - {TRAIN_END_DATE}")
logger.info(f"検証期間: {VAL_START_DATE} - {VAL_END_DATE}")

In [None]:
# データセットの作成
logger.info("学習データセットを作成中...")
train_dataset = HorguesDataset(
    start_date=TRAIN_START_DATE,
    end_date=TRAIN_END_DATE,
    num_horses=NUM_HORSES,
    horse_history_length=HORSE_HISTORY_LENGTH,
    history_days=HISTORY_DAYS,
    exclude_hours_before_race=EXCLUDE_HOURS_BEFORE_RACE,
    cache_dir='cache/train',
    use_cache=True
)

logger.info("検証データセットを作成中...")
val_dataset = HorguesDataset(
    start_date=VAL_START_DATE,
    end_date=VAL_END_DATE,
    num_horses=NUM_HORSES,
    horse_history_length=HORSE_HISTORY_LENGTH,
    history_days=HISTORY_DAYS,
    exclude_hours_before_race=EXCLUDE_HOURS_BEFORE_RACE,
    preprocessing_params=train_dataset.get_preprocessing_params(),
    cache_dir='cache/val',
    use_cache=True
)

logger.info(f"学習データサイズ: {len(train_dataset)}")
logger.info(f"検証データサイズ: {len(val_dataset)}")

In [None]:
# データローダーの作成
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [None]:
# モデル設定の取得
model_config = train_dataset.get_model_config()
logger.info(f"数値特徴量数: {len(model_config['numerical_features'])}")
logger.info(f"カテゴリ特徴量数: {len(model_config['categorical_features'])}")
logger.info(f"時系列データ: {model_config['sequence_names']}")

In [None]:
# モデルの作成
model = HorguesModel(
    sequence_names=model_config['sequence_names'],
    feature_aliases=model_config['feature_aliases'],
    numerical_features=model_config['numerical_features'],
    categorical_features=model_config['categorical_features'],
    d_token=256,
    num_bins=10,
    binning_temperature=1.0,
    binning_init_range=3.0,
    ft_n_layers=3,
    ft_n_heads=8,
    ft_d_ffn=1024,
    seq_n_layers=3,
    seq_n_heads=8,
    seq_d_ffn=1024,
    race_n_layers=3,
    race_n_heads=8,
    race_d_ffn=1024,
    dropout=0.1
).to(device)

In [None]:
# パラメータ数の確認
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"総パラメータ数: {total_params:,}")
logger.info(f"学習可能パラメータ数: {trainable_params:,}")

In [None]:
# 損失関数の設定
criterion = WeightedPlackettLuceLoss(
    temperature=1.0,
    top_k=None,
    weight_decay=0.8,
    reduction='mean'
)

In [None]:
# オプティマイザーの設定
optimizer = optim.AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    betas=(0.9, 0.95)
)

In [None]:
# スケジューラーの設定
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,
    T_mult=2,
    eta_min=LEARNING_RATE * 0.01
)

In [None]:
# 学習履歴を保存するリスト
train_losses = []
val_losses = []
best_val_loss = float('inf')
patience_counter = 0

In [None]:
# 学習ループ
logger.info("学習を開始します...")

for epoch in range(NUM_EPOCHS):
    # 学習フェーズ
    model.train()
    train_loss = 0.0
    train_batches = 0

    train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS} [Train]')
    for batch in train_pbar:
        optimizer.zero_grad()

        # データをデバイスに移動
        x_num = {k: v.to(device) for k, v in batch['x_num'].items()}
        x_cat = {k: v.to(device) for k, v in batch['x_cat'].items()}
        sequence_data = {}
        for seq_name, seq_data in batch['sequence_data'].items():
            sequence_data[seq_name] = {
                'x_num': {k: v.to(device) for k, v in seq_data['x_num'].items()},
                'x_cat': {k: v.to(device) for k, v in seq_data['x_cat'].items()},
                'mask': seq_data['mask'].to(device)
            }
        mask = batch['mask'].to(device)
        rankings = batch['rankings'].to(device)

        # 前向き伝播
        scores = model(x_num, x_cat, sequence_data, mask)
        loss = criterion(scores, rankings, mask)

        # 後向き伝播
        loss.backward()

        # 勾配クリッピング
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        train_loss += loss.item()
        train_batches += 1

        train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})

    avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
    train_losses.append(avg_train_loss)

    # 検証フェーズ
    model.eval()
    val_loss = 0.0
    val_batches = 0

    with torch.no_grad():
        val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS} [Val]')
        for batch in val_pbar:
            # データをデバイスに移動
            x_num = {k: v.to(device) for k, v in batch['x_num'].items()}
            x_cat = {k: v.to(device) for k, v in batch['x_cat'].items()}
            sequence_data = {}
            for seq_name, seq_data in batch['sequence_data'].items():
                sequence_data[seq_name] = {
                    'x_num': {k: v.to(device) for k, v in seq_data['x_num'].items()},
                    'x_cat': {k: v.to(device) for k, v in seq_data['x_cat'].items()},
                    'mask': seq_data['mask'].to(device)
                }
            mask = batch['mask'].to(device)
            rankings = batch['rankings'].to(device)

            scores = model(x_num, x_cat, sequence_data, mask)
            loss = criterion(scores, rankings, mask)

            val_lo  += loss.item()
            val_batches += 1

            val_pbar.set_postfix({'loss': f'{loss.item():.4f}'})

    avg_val_loss = val_loss / val_batches if val_batches > 0 else float('inf')
    val_losses.append(avg_val_loss)

    # スケジューラの更新
    scheduler.step()

    # ログ出力
    current_lr = optimizer.param_groups[0]['lr']
    logger.info(f'Epoch {epoch+1}/{NUM_EPOCHS}: '
                f'Train Loss: {avg_train_loss:.4f}, '
                f'Val Loss: {avt_val_loss:.4f}, '
                f'LR: {current_lr:.6f}')

    # Best modelの保存
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0

        # モデルの保存
        model_save_path = Path('outputs')
        model_save_path.mkdir(exist_ok=True)

        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_loss': avg_train_loss,
            'val_loss': avg_val_loss,
            'model_config': model_config,
            'preprocessing_params': train_dataset.get_preprocessing_params()
        }, model_save_path / 'best_model.pth')

        logger.info(f'新しいベストモデルを保存しました (Val Loss: {best_val_loss:.4f})')
    else:
        patience_counter += 1

    # Early stopping
    if patience_counter >= PATIENCE:
        logger.info(f'Early stopping triggered after {epoch+1} epochs')
        break

logger.info("学習が終了しました")

In [None]:
# 学習曲線の可視化
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss', color='blue')
plt.plot(val_losses, label='Validation Loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot([optimizer.param_groups[0]['lr'] for _ in range(len(train_losses))], 
         label='Learning Rate', color='green')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.title('Learning Rate Schedule')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('outputs/training_curves.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 最終結果の表示
logger.info(f"最良の検証損失: {best_val_loss:.4f}")
logger.info(f"学習データ最終損失: {train_losses[-1]:.4f}")
logger.info(f"検証データ最終損失: {val_losses[-1]:.4f}")

In [None]:
# 学習済みモデルのロード（確認用）
logger.info("保存されたベストモデルをロードして確認...")
checkpoint = torch.load('outputs/best_model.pth', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
logger.info(f"ベストモデル (Epoch {checkpoint['epoch']+1}) をロードしました")