In [None]:
# 必要なライブラリをインポート
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import logging
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from sqlalchemy import create_engine, text
import torch.nn.functional as F

# 自作モジュールをインポート
from _dataset import HorguesDataset
from _models import HorguesModel
from horgues3.losses import HorguesLoss
from horgues3.odds import get_odds_dataframes
from horgues3.haraimodoshi import get_haraimodoshi_dataframes
from horgues3.database import create_database_engine

In [None]:
# ログ設定
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [None]:
# デバイス設定
device = torch.device('cuda:0')
logger.info(f"使用デバイス: {device}")

In [None]:
# 学習パラメータ設定
BATCH_SIZE = 128

In [None]:
checkpoint = torch.load('outputs/best_model.pth', map_location='cpu')

In [None]:
# 学習期間・検証期間設定
START_DATE = '20230101'
END_DATE = '20231231'

logger.info(f"期間: {START_DATE} - {END_DATE}")

In [None]:
# データセットの作成
logger.info("データセットを作成中...")
dataset = HorguesDataset(
    start_date=START_DATE,
    end_date=END_DATE,
    preprocessing_params=checkpoint['preprocessing_params'],
    cache_dir='cache/test',
    use_cache=True
)

logger.info(f"データサイズ: {len(dataset)}")

In [None]:
loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [None]:
# モデル設定の取得
model_config = checkpoint['model_config']
logger.info(f"数値特徴量数: {len(model_config['numerical_features'])}")
logger.info(f"カテゴリ特徴量数: {len(model_config['categorical_features'])}")
logger.info(f"時系列データ: {model_config['sequence_names']}")

In [None]:
# モデルの作成
model = HorguesModel(
    sequence_names=model_config['sequence_names'],
    feature_aliases=model_config['feature_aliases'],
    numerical_features=model_config['numerical_features'],
    categorical_features=model_config['categorical_features'],
).to(device)

In [None]:
model.load_state_dict(checkpoint['model_state_dict'])

In [None]:
race_ids = []
masks = []
targets = []
scores = []

model.eval()
with torch.no_grad():
    pbar = tqdm(loader, desc='Predict')
    for batch in pbar:
        race_ids.extend(batch['race_id'])
        masks.append(batch['mask'])
        targets.append(batch['target'])

        x_num = {k: v.to(device) for k, v in batch['x_num'].items()}
        x_cat = {k: v.to(device) for k, v in batch['x_cat'].items()}
        sequence_data = {}
        for seq_name, seq_data in batch['sequence_data'].items():
            sequence_data[seq_name] = {
                'x_num': {k: v.to(device) for k, v in seq_data['x_num'].items()},
                'x_cat': {k: v.to(device) for k, v in seq_data['x_cat'].items()},
                'mask': seq_data['mask'].to(device)
            }
        mask = batch['mask'].to(device)
        target = batch['target'].to(device)

        scores.append(model(x_num, x_cat, sequence_data, mask).cpu())

    masks = torch.cat(masks, dim=0)
    targets = torch.cat(targets, dim=0)
    scores = torch.cat(scores, dim=0)

In [None]:
odds = get_odds_dataframes(START_DATE, END_DATE)
haraimodoshi = get_haraimodoshi_dataframes(START_DATE, END_DATE)

In [None]:
scores.size()

In [None]:
sns.histplot(F.sigmoid(scores[:, :, 2][masks]))
sns.histplot(F.sigmoid(scores[:, :, 1][masks]))
sns.histplot(F.sigmoid(scores[:, :, 0][masks]))

In [None]:
def convert_to_series(tensor, name):
    tmp = pd.DataFrame(tensor, index=race_ids, columns=np.arange(1, 19))
    tmp = tmp.reset_index(names='race_id').melt(id_vars='race_id', var_name='uma_number')
    tmp = tmp.set_index(['race_id', 'uma_number']).value.rename(name)
    return tmp

In [None]:
df = pd.DataFrame(
    {
        'score_0': convert_to_series(F.sigmoid(scores[:, :, 0]), 'score_0'),
        'score_1': convert_to_series(F.sigmoid(scores[:, :, 1]), 'score_1'),
        'score_2': convert_to_series(F.sigmoid(scores[:, :, 2]), 'score_2'),
        'target_0': convert_to_series(targets[:, :, 0], 'target_0'),
        'target_1': convert_to_series(targets[:, :, 1], 'target_1'),
        'target_2': convert_to_series(targets[:, :, 2], 'target_2'),
        'mask': convert_to_series(masks, 'mask'),
    }
)
df = df[df['mask']].sort_index()

In [None]:
df['score_ranking'] = df.groupby('race_id')['score_0'].rank(method='first', ascending=False)

In [None]:
df.head(30)

In [None]:
engine = create_database_engine('pckeiba')

In [None]:
with engine.connect() as conn:
    conn.execute(
        text("""DELETE FROM apd_shisu_2 WHERE shisu_no IN (1, 2, 3)""")
    )
    conn.commit()

In [None]:
with engine.connect() as conn:
    for (
        race_id, uma_number
    ), (
        score_0, score_1, score_2, target_0, target_1, target_2, mask, score_ranking
    ) in df.iterrows():
        for i, score in zip([1, 2, 3], [score_0, score_1, score_2]):
            conn.execute(
                text("""INSERT INTO apd_shisu_2 (
                            shisu_no, kaisai_nen, kaisai_tsukihi, keibajo_code, race_bango, umaban, shisu, juni
                        ) VALUES (
                            :shisu_no, :kaisai_nen, :kaisai_tsukihi, :keibajo_code, :race_bango, :umaban, :shisu, :juni
                        )"""),
                {"shisu_no": i, "kaisai_nen": race_id[0:4], "kaisai_tsukihi": race_id[4:8], "keibajo_code": race_id[8:10],
                 "race_bango": race_id[14:16], "umaban": uma_number, "shisu": score, "juni": score_ranking} 
            )
    conn.commit()