In [1]:
%load_ext autoreload
%autoreload 2

import torch
import logging
from torch.utils.data import DataLoader
from horgues3.models import Horgues3Model, PlackettLuceLoss
from horgues3.dataset import Horgues3Dataset
import numpy as np
from horgues3.betting import calculate_betting_probabilities, get_betting_combinations, format_betting_results

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
dataset = Horgues3Dataset(
    start_ymd='20200101',
    end_ymd='20250531',
    max_horses=18
)
dataset.fetch_data().fit_preprocessors().preprocess_data().build_race_data()

train_loader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0  # Windowsでは0に設定
)

# サンプルデータの確認
sample = dataset[0]
logger.info("Sample data structure:")
for key, value in sample.items():
    if isinstance(value, torch.Tensor):
        logger.info(f"{key}: {value.shape} - {value.dtype}")
    else:
        logger.info(f"{key}: {value}")

INFO:horgues3.dataset:Fetching data from database...
INFO:horgues3.dataset:Data fetched successfully. Retrieved 392791 records.
INFO:horgues3.dataset:Post-processing data...
INFO:horgues3.dataset:Post-processing completed.
INFO:horgues3.dataset:Fitting preprocessors...
INFO:horgues3.dataset:Preprocessors fitted successfully.
INFO:horgues3.dataset:Preprocessing data...
INFO:horgues3.dataset:Data preprocessing completed.
INFO:horgues3.dataset:Building race data...
INFO:horgues3.dataset:Built 32284 races with valid data.
INFO:__main__:Sample data structure:
INFO:__main__:race_id: 2020010145120101
INFO:__main__:x_num: torch.Size([18, 2]) - torch.float32
INFO:__main__:x_cat: torch.Size([18, 1]) - torch.int64
INFO:__main__:rankings: torch.Size([18]) - torch.int64
INFO:__main__:mask: torch.Size([18]) - torch.bool
INFO:__main__:num_horses: 12


In [3]:
# バリデーション用の1日分のデータを準備
validation_date = '20250601'
logger.info(f"Preparing validation data for {validation_date}...")

# バリデーション用データセットを作成
validation_dataset = Horgues3Dataset(
    start_ymd=validation_date,
    end_ymd=validation_date,
    max_horses=18
)

# データを取得・前処理（既存の前処理器を使用）
validation_dataset.fetch_data().set_preprocessors(dataset.get_preprocessors()).preprocess_data().build_race_data()

# バリデーション用データローダー
validation_loader = DataLoader(
    validation_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0
)

INFO:__main__:Preparing validation data for 20250601...
INFO:horgues3.dataset:Fetching data from database...
INFO:horgues3.dataset:Fetching data from database...
INFO:horgues3.dataset:Data fetched successfully. Retrieved 357 records.
INFO:horgues3.dataset:Post-processing data...
INFO:horgues3.dataset:Post-processing completed.
INFO:horgues3.dataset:Preprocessing data...
INFO:horgues3.dataset:Data preprocessing completed.
INFO:horgues3.dataset:Building race data...
INFO:horgues3.dataset:Built 24 races with valid data.


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
logger.info(f"Using device: {device}")

feature_config = dataset.get_feature_config()

# モデル、損失関数、オプティマイザーの初期化
model = Horgues3Model(**feature_config).to(device)
criterion = PlackettLuceLoss(temperature=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# 学習パラメータ
num_epochs = 10
log_interval = 10

logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
logger.info(f"Training samples: {len(dataset):,}")
logger.info(f"Batch size: {train_loader.batch_size}")
logger.info(f"Batches per epoch: {len(train_loader)}")

INFO:__main__:Using device: cuda
INFO:__main__:Model parameters: 1,504,129
INFO:__main__:Training samples: 32,284
INFO:__main__:Batch size: 32
INFO:__main__:Batches per epoch: 1009


In [None]:
# 学習ループ
running_loss = 0.0  # ログ用の損失
total_batches = 0

for epoch in range(num_epochs):
    epoch_loss = 0.0
    num_batches = 0

    model.train()
    for batch_idx, batch in enumerate(train_loader):
        # データをデバイスに移動
        x_num = batch['x_num'].to(device)
        x_cat = batch['x_cat'].to(device)
        rankings = batch['rankings'].to(device)
        mask = batch['mask'].to(device)

        # 勾配をゼロに初期化
        optimizer.zero_grad()

        # 順伝播
        scores = model(x_num, x_cat, mask)

        # 損失計算
        loss = criterion(scores, rankings, mask)

        # 逆伝播
        loss.backward()

        # パラメータ更新
        optimizer.step()

        # 損失の記録
        epoch_loss += loss.item()
        running_loss += loss.item()
        num_batches += 1
        total_batches += 1

        # ログ出力
        if (batch_idx + 1) % log_interval == 0:
            avg_loss = running_loss / log_interval
            logger.info(f'Epoch {epoch + 1}/{num_epochs}, Batch {batch_idx + 1}/{len(train_loader)}, Avg Loss: {avg_loss:.4f}')
            running_loss = 0.0

    # エポック終了時の平均損失
    avg_epoch_loss = epoch_loss / num_batches
    logger.info(f'Epoch {epoch + 1}/{num_epochs} completed - Average Loss: {avg_epoch_loss:.4f}')

    # バリデーション評価
    logger.info(f"Running validation for epoch {epoch + 1}...")
    model.eval()

    validation_results = []
    validation_results.append("")
    validation_results.append(f"\n{'='*80}")
    validation_results.append(f"VALIDATION RESULTS - EPOCH {epoch + 1}")
    validation_results.append(f"{'='*80}")

    with torch.no_grad():
        for val_batch in validation_loader:
            # データをデバイスに移動
            x_num = val_batch['x_num'].to(device)
            x_cat = val_batch['x_cat'].to(device)
            mask = val_batch['mask'].to(device)
            
            # 予測
            scores = model(x_num, x_cat, mask)
            
            # バッチ内の各レースについて処理
            batch_size = scores.shape[0]
            for i in range(batch_size):
                race_id = val_batch['race_id'][i]
                race_scores = scores[i].cpu().numpy()
                race_mask = mask[i].cpu().numpy()
                num_horses = val_batch['num_horses'][i].item()
                
                # 馬券確率を計算
                probabilities = calculate_betting_probabilities(
                    horse_strengths=race_scores.reshape(1, -1),
                    mask=race_mask.reshape(1, -1)
                )
                
                # 各馬券種の確率を1次元に変換
                for bet_type in probabilities:
                    probabilities[bet_type] = probabilities[bet_type][0]
                
                # 結果をフォーマット
                result = format_betting_results(race_id, probabilities, num_horses)
                validation_results.append(result)
    
    validation_results.append(f"{'='*80}")
    validation_results.append("")

    # バリデーション結果を出力
    for result in validation_results:
        logger.info(result)
    
logger.info("Training completed!")

INFO:__main__:Epoch 1/10, Batch 10/1009, Avg Loss: 21.6912
INFO:__main__:Epoch 1/10, Batch 20/1009, Avg Loss: 20.9054
INFO:__main__:Epoch 1/10, Batch 30/1009, Avg Loss: 21.0486
INFO:__main__:Epoch 1/10, Batch 40/1009, Avg Loss: 21.6283
INFO:__main__:Epoch 1/10, Batch 50/1009, Avg Loss: 20.8862
INFO:__main__:Epoch 1/10, Batch 60/1009, Avg Loss: 20.5227
INFO:__main__:Epoch 1/10, Batch 70/1009, Avg Loss: 21.4370
INFO:__main__:Epoch 1/10, Batch 80/1009, Avg Loss: 20.7360
INFO:__main__:Epoch 1/10, Batch 90/1009, Avg Loss: 21.7028
INFO:__main__:Epoch 1/10, Batch 100/1009, Avg Loss: 20.6720
INFO:__main__:Epoch 1/10, Batch 110/1009, Avg Loss: 20.4984
INFO:__main__:Epoch 1/10, Batch 120/1009, Avg Loss: 20.6078
INFO:__main__:Epoch 1/10, Batch 130/1009, Avg Loss: 21.1518
INFO:__main__:Epoch 1/10, Batch 140/1009, Avg Loss: 21.4765
INFO:__main__:Epoch 1/10, Batch 150/1009, Avg Loss: 20.6782
INFO:__main__:Epoch 1/10, Batch 160/1009, Avg Loss: 21.2873
INFO:__main__:Epoch 1/10, Batch 170/1009, Avg Los