In [1]:
import math
from pathlib import Path

import pandas as pd

DATA_DIR = Path("..", "data")
PREPROCESSED_DIR = DATA_DIR / "01_preprocessed"
TRAIN_DIR = DATA_DIR / "03_train"
OUTPUT_DIR = DATA_DIR / "04_evaluation"
train_dir = TRAIN_DIR
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
evaluation_filename = "evaluation_lightgbm_time_cv_nopast_odds.csv"
return_tables_filepath= PREPROCESSED_DIR / "return_tables.pickle"

In [2]:
return_tables = pd.read_pickle(PREPROCESSED_DIR / "return_tables.pickle")

In [3]:
evaluation_df = pd.read_csv(train_dir / evaluation_filename, sep="\t")

In [15]:
return_tables.head(11)

Unnamed: 0,race_id,bet_type,win_umaban,return
0,201606010105,単勝,[2],400
1,201606010105,複勝,[2],170
2,201606010105,複勝,[13],190
3,201606010105,複勝,[10],200
4,201606010105,馬連,"[2, 13]",1470
5,201606010105,ワイド,"[2, 13]",530
6,201606010105,ワイド,"[2, 10]",680
7,201606010105,ワイド,"[10, 13]",670
8,201606010105,馬単,"[2, 13]",2720
9,201606010105,三連複,"[2, 10, 13]",3470


In [74]:
bet_df = (
    evaluation_df.sort_values("pred", ascending=True)
    .groupby("race_id")
    .head(10)
    .groupby("race_id")["umaban"]
    .apply(lambda x: list(x.astype(str)))
    .reset_index()
)
bet_df

Unnamed: 0,race_id,umaban
0,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]"
1,201703030107,"[6, 15, 7, 11, 12, 10, 14, 2, 5, 13]"
2,201703030109,"[2, 4, 3, 9, 8, 1, 7, 10, 6, 5]"
3,201703030110,"[16, 1, 11, 5, 9, 4, 3, 6, 8, 13]"
4,201703030111,"[7, 5, 10, 13, 8, 15, 4, 6, 1, 9]"
...,...,...
10409,202410030804,"[3, 4, 2, 9, 14, 13, 8, 15, 10, 1]"
10410,202410030807,"[4, 10, 1, 9, 8, 6, 12, 11, 7, 2]"
10411,202410030809,"[4, 2, 13, 7, 9, 14, 6, 10, 3, 12]"
10412,202410030810,"[17, 11, 9, 13, 5, 14, 6, 16, 12, 1]"


In [75]:
df = bet_df.merge(return_tables,on = "race_id")

In [76]:
df 

Unnamed: 0,race_id,umaban,bet_type,win_umaban,return
0,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",単勝,[10],2620
1,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[10],470
2,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[8],140
3,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[15],180
4,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",馬連,"[8, 10]",3530
...,...,...,...,...,...
114531,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[2, 7]",780
114532,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[6, 7]",510
114533,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",馬単,"[2, 6]",3960
114534,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",三連複,"[2, 6, 7]",3570


In [77]:
# 一頭軸の的中判定
def one_head_hit(row):
    # 一頭軸の馬番（先頭1つ目の数字）
    head = row["umaban"][0]
    # 残りの馬番
    other_horses = row["umaban"][1:]
    # win_umabanが一頭軸として的中しているかを判定
    if head in row["win_umaban"]:
        # 残りの馬番のうち、win_umabanに含まれるものを探す
        remaining_hits = [h for h in row["win_umaban"] if h in other_horses]
        # 必要な的中数を満たしているか
        if len(remaining_hits) >= len(row["win_umaban"]) - 1:
            return True
    return False


df["one_head_hit_trio"] = df.apply(one_head_hit, axis=1)


In [78]:
# 二頭軸の的中判定
def two_head_hit(row):
    # 二頭軸の馬番（先頭2つの数字）
    head1, head2 = row["umaban"][:2]
    # 残りの馬番
    other_horses = row["umaban"][2:]
    # win_umabanが二頭軸として的中しているかを判定
    if head1 in row["win_umaban"] and head2 in row["win_umaban"]:
        # 残りの馬番のうち、win_umabanに含まれるものを探す
        remaining_hits = [h for h in row["win_umaban"] if h in other_horses]
        # 必要な的中数を満たしているか
        if len(remaining_hits) >= len(row["win_umaban"]) - 2:
            return True
    return False

df["two_head_hit_trio"] = df.apply(two_head_hit, axis=1)


In [79]:
# 一頭軸の的中判定（軸馬は完全一致、他は順不同）連単系
def one_head_hit(row):
    # 一頭軸の馬番（先頭1つ目の数字）
    head = row["umaban"][0]
    # 残りの馬番
    other_horses = row["umaban"][1:]
    # 軸馬がwin_umabanの先頭と一致し、残りが順不同で含まれている場合
    if row["win_umaban"][0] == head:
        return set(row["win_umaban"][1:]).issubset(set(other_horses))
    return False

df["one_head_hit_trifecta"] = df.apply(one_head_hit, axis=1)


In [80]:
# ２頭軸の的中判定（軸馬は完全一致、他は順不同）連単系
def two_head_hit(row):
    # 軸馬（umabanの先頭2つ）
    head_horses = set(row["umaban"][:2])
    # 残りの馬（umabanの残り）
    other_horses = set(row["umaban"][2:])
    # win_umaban の軸部分と残り部分
    win_heads = set(row["win_umaban"][:2])
    win_others = set(row["win_umaban"][2:])
    # 条件判定
    return head_horses == win_heads and win_others.issubset(other_horses)

df["two_head_hit_trifecta"] = df.apply(two_head_hit, axis=1)
df

Unnamed: 0,race_id,umaban,bet_type,win_umaban,return,one_head_hit_trio,two_head_hit_trio,one_head_hit_trifecta,two_head_hit_trifecta
0,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",単勝,[10],2620,False,False,False,False
1,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[10],470,False,False,False,False
2,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[8],140,False,False,False,False
3,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[15],180,True,False,True,False
4,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",馬連,"[8, 10]",3530,False,False,False,False
...,...,...,...,...,...,...,...,...,...
114531,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[2, 7]",780,False,False,False,False
114532,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[6, 7]",510,False,False,False,False
114533,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",馬単,"[2, 6]",3960,False,False,False,False
114534,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",三連複,"[2, 6, 7]",3570,False,False,False,False


In [81]:
agg_hitrate_one_head = (
    df.groupby(["race_id", "bet_type"])["one_head_hit_trio"]
    .max()
    .groupby("bet_type")
    .mean()
    .rename(f"hitrate_one_head_hit_trio")
    .to_frame()
)

In [82]:
agg_hitrate_two_head = (
    df.groupby(["race_id", "bet_type"])["two_head_hit_trio"]
    .max()
    .groupby("bet_type")
    .mean()
    .rename(f"hitrate_two_head_hit")
    .to_frame()
)

In [83]:
agg_hitrate_two_head

Unnamed: 0_level_0,hitrate_two_head_hit
bet_type,Unnamed: 1_level_1
ワイド,0.308047
三連単,0.280968
三連複,0.280968
単勝,0.0
複勝,0.0
馬単,0.146341
馬連,0.146341


In [84]:
# 馬券種ごとの回収率
df["one_head_hit_return"] = df["return"] * df["one_head_hit_trio"]
df["two_head_hit_return"] = df["return"] * df["two_head_hit_trio"]
df

Unnamed: 0,race_id,umaban,bet_type,win_umaban,return,one_head_hit_trio,two_head_hit_trio,one_head_hit_trifecta,two_head_hit_trifecta,one_head_hit_return,two_head_hit_return
0,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",単勝,[10],2620,False,False,False,False,0,0
1,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[10],470,False,False,False,False,0,0
2,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[8],140,False,False,False,False,0,0
3,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",複勝,[15],180,True,False,True,False,180,0
4,201703030102,"[15, 12, 8, 6, 9, 11, 13, 10, 4, 16]",馬連,"[8, 10]",3530,False,False,False,False,0,0
...,...,...,...,...,...,...,...,...,...,...,...
114531,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[2, 7]",780,False,False,False,False,0,0
114532,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",ワイド,"[6, 7]",510,False,False,False,False,0,0
114533,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",馬単,"[2, 6]",3960,False,False,False,False,0,0
114534,202410030811,"[14, 9, 13, 7, 12, 6, 2, 5, 8, 3]",三連複,"[2, 6, 7]",3570,False,False,False,False,0,0


In [109]:
n = 10
n_bets_dict_one_head = {
    "単勝": 1,
    "複勝": 1,
    "馬連": n-1,
    "ワイド": n-1,
    "馬単": n-1,
    "三連複": ((n-1)*(n-2))/2,
    "三連単": (n-1)*(n-2),
}
n_bets_dict_two_head = {
    "単勝": 1,
    "複勝": 1,
    "馬連": 1,
    "ワイド": 1,
    "馬単": 2,
    "三連複": n-2,
    "三連単": n*2,
}
n_bets_dict_one_head

{'単勝': 1, '複勝': 1, '馬連': 9, 'ワイド': 9, '馬単': 9, '三連複': 36.0, '三連単': 72}

In [107]:
n_bets_dict_two_head

{'単勝': 1, '複勝': 1, '馬連': 1, 'ワイド': 1, '馬単': 2, '三連複': 8, '三連単': 20}

In [95]:
#回収率計算
agg_df_one_head = df.groupby(["race_id", "bet_type"])["one_head_hit_return"].sum().reset_index()
agg_df_two_head = df.groupby(["race_id", "bet_type"])["two_head_hit_return"].sum().reset_index()

In [96]:
agg_df_one_head

Unnamed: 0,race_id,bet_type,one_head_hit_return
0,201703030102,ワイド,1710
1,201703030102,三連単,38950
2,201703030102,三連複,3870
3,201703030102,単勝,0
4,201703030102,複勝,180
...,...,...,...
72893,202410030811,三連複,0
72894,202410030811,単勝,0
72895,202410030811,複勝,0
72896,202410030811,馬単,0


In [97]:
agg_df_two_head

Unnamed: 0,race_id,bet_type,two_head_hit_return
0,201703030102,ワイド,0
1,201703030102,三連単,0
2,201703030102,三連複,0
3,201703030102,単勝,0
4,201703030102,複勝,0
...,...,...,...
72893,202410030811,三連複,0
72894,202410030811,単勝,0
72895,202410030811,複勝,0
72896,202410030811,馬単,0


In [98]:
agg_df_one_head["n_bets_one"] = agg_df_one_head["bet_type"].map(n_bets_dict_one_head)
agg_df_two_head["n_bets_two"] = agg_df_two_head["bet_type"].map(n_bets_dict_two_head)

In [99]:
agg_df_one_head  = (
            agg_df_one_head.query("n_bets_one > 0")
            .groupby("bet_type")[["one_head_hit_return", "n_bets_one"]]
            .sum()
)
agg_returnrate_one_head = (
    (agg_df_one_head["one_head_hit_return"] / agg_df_one_head["n_bets_one"] / 100)
    .rename(f"returnrate_one_head")
    .to_frame()
    .reset_index()
)

agg_df_two_head = (
            agg_df_two_head.query("n_bets_two > 0")
            .groupby("bet_type")[["two_head_hit_return", "n_bets_two"]]
            .sum()
)
agg_returnrate_two_head = (
    (agg_df_two_head["two_head_hit_return"] / agg_df_two_head["n_bets_two"] / 100)
    .rename(f"returnrate_two_head")
    .to_frame()
    .reset_index()
)

In [100]:
# 1つ目と2つ目のデータフレームを結合
merged_1 = pd.merge(agg_hitrate_one_head, agg_returnrate_one_head, on="bet_type")

# その結果を3つ目と4つ目のデータフレームと結合
output_df = pd.merge(merged_1, agg_hitrate_two_head,on="bet_type")
output_df = pd.merge(output_df, agg_returnrate_two_head, on="bet_type")


In [101]:
output_df.insert(0, "topn", n)

In [102]:
output_df

Unnamed: 0,topn,bet_type,hitrate_one_head_hit_trio,returnrate_one_head,hitrate_two_head_hit,returnrate_two_head
0,10,ワイド,0.624256,0.980559,0.308047,1.187334
1,10,三連単,0.540618,3.132244,0.280968,3.39912
2,10,三連複,0.540618,1.024077,0.280968,1.437238
3,10,単勝,0.299981,1.028212,0.0,0.0
4,10,複勝,0.623488,0.981438,0.0,0.0
5,10,馬単,0.460726,1.955186,0.146341,1.442299
6,10,馬連,0.460726,1.024326,0.146341,1.450797


In [54]:
import math
import pandas as pd
n = 5
bet_df = (
    evaluation_df.sort_values("pred", ascending=True)
    .groupby("race_id")
    .head(n)
    .groupby("race_id")["umaban"]
    .apply(lambda x: list(x.astype(str)))
    .reset_index()
)
df = bet_df.merge(return_tables,on = "race_id")

# 一頭軸の的中判定
def hit_one_axis(row):
    axis = row['umaban'][0]  # 一頭軸の馬番
    other_horses = set(row['umaban'][1:])  # 残りの馬番
    return set(row['win_umaban']).issubset(other_horses.union([axis]))

# 二頭軸の的中判定
def hit_two_axis(row):
    axis = row['umaban'][:2]  # 二頭軸の馬番
    other_horses = set(row['umaban'][2:])  # 残りの馬番
    return set(row['win_umaban']).issubset(other_horses.union(axis))

# 一頭軸と二頭軸のシミュレーション
def simulate_bets(df, exp_name):
    # 一頭軸の的中判定
    df['hit_one_axis'] = df.apply(hit_one_axis, axis=1)
    
    # 二頭軸の的中判定
    df['hit_two_axis'] = df.apply(hit_two_axis, axis=1)
    
    # 馬券種ごとの的中率 (一頭軸)
    agg_hitrate_one_axis = (
        df.groupby(['race_id', 'bet_type'])['hit_one_axis']
        .max()
        .groupby('bet_type')
        .mean()
        .rename(f'hitrate_one_axis_{exp_name}')
        .to_frame()
    )
    
    # 馬券種ごとの的中率 (二頭軸)
    agg_hitrate_two_axis = (
        df.groupby(['race_id', 'bet_type'])['hit_two_axis']
        .max()
        .groupby('bet_type')
        .mean()
        .rename(f'hitrate_two_axis_{exp_name}')
        .to_frame()
    )

    # 一頭軸の回収率
    df['hit_return_one_axis'] = df['return'] * df['hit_one_axis']
    
    # 二頭軸の回収率
    df['hit_return_two_axis'] = df['return'] * df['hit_two_axis']
    
    # 一頭軸と二頭軸の馬券種ごとの回収率
    n_bets_dict = {
        '単勝': n,
        '複勝': n,
        '馬連': math.comb(n, 2),
        'ワイド': math.comb(n, 2),
        '馬単': math.perm(n, 2),
        '三連複': math.comb(n, 3),
        '三連単': math.perm(n, 3),
    }
    
    # 一頭軸の回収率集計
    agg_df_one_axis = df.groupby(['race_id', 'bet_type'])['hit_return_one_axis'].sum().reset_index()
    agg_df_one_axis['n_bets'] = agg_df_one_axis['bet_type'].map(n_bets_dict)
    agg_df_one_axis = agg_df_one_axis.query('n_bets > 0').groupby('bet_type')[['hit_return_one_axis', 'n_bets']].sum()
    
    # 二頭軸の回収率集計
    agg_df_two_axis = df.groupby(['race_id', 'bet_type'])['hit_return_two_axis'].sum().reset_index()
    agg_df_two_axis['n_bets'] = agg_df_two_axis['bet_type'].map(n_bets_dict)
    agg_df_two_axis = agg_df_two_axis.query('n_bets > 0').groupby('bet_type')[['hit_return_two_axis', 'n_bets']].sum()

    # 回収率計算
    agg_returnrate_one_axis = (
        (agg_df_one_axis['hit_return_one_axis'] / agg_df_one_axis['n_bets'] / 100)
        .rename(f'returnrate_one_axis_{exp_name}')
        .to_frame()
        .reset_index()
    )
    
    agg_returnrate_two_axis = (
        (agg_df_two_axis['hit_return_two_axis'] / agg_df_two_axis['n_bets'] / 100)
        .rename(f'returnrate_two_axis_{exp_name}')
        .to_frame()
        .reset_index()
    )

    # 結果のマージ
    output_df = pd.merge(agg_hitrate_one_axis, agg_returnrate_one_axis, on='bet_type', how='outer')
    output_df = pd.merge(output_df, agg_hitrate_two_axis, on='bet_type', how='outer')
    output_df = pd.merge(output_df, agg_returnrate_two_axis, on='bet_type', how='outer')
    
    output_df.insert(0, 'topn', n)
    
    return output_df

# シミュレーション実行例
result = simulate_bets(df, 'exp_1')
result


Unnamed: 0,topn,bet_type,hitrate_one_axis_exp_1,returnrate_one_axis_exp_1,hitrate_two_axis_exp_1,returnrate_two_axis_exp_1
0,5,ワイド,0.813616,1.044175,0.813616,1.044175
1,5,三連単,0.353274,1.324594,0.353274,1.324594
2,5,三連複,0.353274,1.308447,0.353274,1.308447
3,5,単勝,0.778567,0.890373,0.778567,0.890373
4,5,複勝,0.981179,0.905291,0.981179,0.905291
5,5,馬単,0.562032,1.014881,0.562032,1.014881
6,5,馬連,0.562032,1.022531,0.562032,1.022531


In [13]:
import pandas as pd
import math

# df: bet_dfとreturn_tablesをマージしたDataFrame

# 一頭軸と二頭軸の的中判定
def hit_one_axis(row):
    umaban = row["umaban"]
    win_umaban = row["win_umaban"]
    # 一頭軸の場合、先頭の馬番を軸として、残りの馬番を組み合わせ
    axis = umaban[0]  # 軸1
    other_horses = set(umaban[1:])
    return axis == win_umaban[0] and other_horses.issubset(set(win_umaban[1:]))

def hit_two_axis(row):
    umaban = row["umaban"]
    win_umaban = row["win_umaban"]
    # 二頭軸の場合、先頭の2つの馬番を軸として、残りの馬番を組み合わせ
    axis = umaban[:2]  # 軸1,2
    other_horses = set(umaban[2:])
    return set(win_umaban[:2]) == set(axis) and other_horses.issubset(set(win_umaban[2:]))

# 一頭軸の的中判定
df["hit_one_axis"] = df.apply(hit_one_axis, axis=1)

# 二頭軸の的中判定
df["hit_two_axis"] = df.apply(hit_two_axis, axis=1)

# 馬券の種類ごとの購入通り数を設定
n = 5  # umabanの長さ
n_bets_dict = {
    "単勝": n,
    "複勝": n,
    "馬連": math.comb(n, 2),
    "ワイド": math.comb(n, 2),
    "馬単": math.perm(n, 2),
    "三連複": math.comb(n, 3),
    "三連単": math.perm(n, 3),
}

# 回収率を計算
df["hit_return_one_axis"] = df["return"] * df["hit_one_axis"]
df["hit_return_two_axis"] = df["return"] * df["hit_two_axis"]

# 回収率の集計
agg_df = df.groupby(["race_id", "bet_type"])[["hit_return_one_axis", "hit_return_two_axis"]].sum().reset_index()
agg_df["n_bets_one_axis"] = agg_df["bet_type"].map(n_bets_dict)
agg_df["n_bets_two_axis"] = agg_df["bet_type"].map(n_bets_dict)

agg_df_one_axis = (
    agg_df.query("n_bets_one_axis > 0")
    .groupby("bet_type")[["hit_return_one_axis", "n_bets_one_axis"]]
    .sum()
)
agg_returnrate_one_axis = (
    (agg_df_one_axis["hit_return_one_axis"] / agg_df_one_axis["n_bets_one_axis"] / 100)
    .rename("returnrate_one_axis")
    .to_frame()
    .reset_index()
)

agg_df_two_axis = (
    agg_df.query("n_bets_two_axis > 0")
    .groupby("bet_type")[["hit_return_two_axis", "n_bets_two_axis"]]
    .sum()
)
agg_returnrate_two_axis = (
    (agg_df_two_axis["hit_return_two_axis"] / agg_df_two_axis["n_bets_two_axis"] / 100)
    .rename("returnrate_two_axis")
    .to_frame()
    .reset_index()
)

# 一頭軸と二頭軸の的中率を計算
agg_df["hit_one_axis_count"] = df["hit_one_axis"].sum()  # 一頭軸の的中数
agg_df["hit_two_axis_count"] = df["hit_two_axis"].sum()  # 二頭軸の的中数

# 一頭軸と二頭軸の的中率を計算
agg_df["hit_rate_one_axis"] = agg_df["hit_one_axis_count"] / agg_df["n_bets_one_axis"]
agg_df["hit_rate_two_axis"] = agg_df["hit_two_axis_count"] / agg_df["n_bets_two_axis"]

# 結果に的中率を追加
agg_returnrate_one_axis["hit_rate_one_axis"] = agg_df["hit_rate_one_axis"]
agg_returnrate_two_axis["hit_rate_two_axis"] = agg_df["hit_rate_two_axis"]

# 結果を統合
output_df = pd.merge(agg_returnrate_one_axis, agg_returnrate_two_axis, on="bet_type")
output_df.insert(0, "topn", n)

# 結果表示
output_df




Unnamed: 0,topn,bet_type,returnrate_one_axis,hit_rate_one_axis,returnrate_two_axis,hit_rate_two_axis
0,5,ワイド,0.0,0.1,0.0,0.0
1,5,三連単,4e-06,0.016667,0.0,0.0
2,5,三連複,0.0,0.1,0.0,0.0
3,5,単勝,0.0,0.2,0.0,0.0
4,5,複勝,0.0,0.2,0.0,0.0
5,5,馬単,0.0,0.05,0.0,0.0
6,5,馬連,0.0,0.1,0.0,0.0


In [49]:
import itertools
import math
def calculate_hit_and_return_top(row, top_n=1):
    # umaban と win_umaban をリストに変換（必要なら）
    umaban = row['umaban']
    win_umaban = row['win_umaban']
    
    if isinstance(umaban, str):
        umaban = ast.literal_eval(umaban)
    if isinstance(win_umaban, str):
        win_umaban = ast.literal_eval(win_umaban)
    
    bet_type = row['bet_type']
    bet_return = row['return']  # 払い戻し額を取得
    bet_amount = 100  # 賭け金（1口100円と仮定）

    hits = 0
    total_return = -bet_amount  # 賭け金を引いた初期値を設定
    
    # 軸馬の選定（top_nに応じて）
    if top_n == 1:
        axis_horses = [umaban[0]]  # 1頭軸
    elif top_n == 2:
        axis_horses = umaban[:2]  # 2頭軸
    else:
        return pd.Series([0, 0])  # top_nが不正な場合、計算できない
    
    remaining_horses = umaban[len(axis_horses):]  # 残りの馬（軸馬を除いた馬）

    # 買い方ごとの組み合わせを計算
    if bet_type == '馬連':
        # 軸1頭または2頭と残りの馬から組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 2 - len(axis_horses)))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬 + 残りの馬
            if ticket.issubset(set(win_umaban)):  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    elif bet_type == '三連複':
        # 軸1頭または2頭と残りの馬から組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 3 - len(axis_horses)))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬 + 残りの馬
            if ticket.issubset(set(win_umaban)):  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    elif bet_type == '三連単':
        # 軸1頭または2頭と残りの馬の順番を考慮した組み合わせ
        combinations = list(itertools.permutations(remaining_horses, 3 - len(axis_horses)))
        for comb in combinations:
            ticket = axis_horses + list(comb)  # 順番を含めた組み合わせ
            if ticket == win_umaban:  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    # 的中数と総払い戻しを返す
    return pd.Series([hits, total_return])

# 1. 馬券シミュレーションの実行
df = bet_df.merge(return_tables, on="race_id")
df[['hits', 'total_return']] = df.apply(calculate_hit_and_return_top, axis=1, top_n=1)  # top1の一頭軸でシミュレーション

# 2. 的中率と回収率の計算
agg_hitrate = (
    df.groupby(["race_id", "bet_type"])["hits"]
    .sum()
    .groupby("bet_type")
    .mean()
    .rename(f"hitrate_top1")
    .to_frame()
)

# 3. 馬券種ごとの回収率
df["hit_return"] = df["total_return"]
n_bets_dict = {
    "単勝": 1,
    "複勝": 1,
    "馬連": math.comb(n, 2),
    "ワイド": math.comb(n, 2),
    "馬単": math.perm(n, 2),
    "三連複": math.comb(n, 3),
    "三連単": math.perm(n, 3),
}
agg_df = df.groupby(["race_id", "bet_type"])["hit_return"].sum().reset_index()
agg_df["n_bets"] = agg_df["bet_type"].map(n_bets_dict)
agg_df = (
    agg_df.query("n_bets > 0")
    .groupby("bet_type")[["hit_return", "n_bets"]]
    .sum()
)
agg_returnrate = (
    (agg_df["hit_return"] / agg_df["n_bets"] / 100)
    .rename(f"returnrate_top1")
    .to_frame()
    .reset_index()
)

output_df = pd.merge(agg_hitrate, agg_returnrate, on="bet_type")
output_df.insert(0, "topn", 1)  # 1頭軸の場合
print(output_df)


   topn bet_type  hitrate_top1  returnrate_top1
0     1       単勝           0.0        -1.002209
1     1       複勝           0.0        -2.972825


In [42]:
# 1. 的中率と回収率を計算する関数
def calculate_hit_and_return(row):
    # umaban と win_umaban をリストに変換（必要なら）
    umaban = row['umaban']
    win_umaban = row['win_umaban']
    
    if isinstance(umaban, str):
        umaban = ast.literal_eval(umaban)
    if isinstance(win_umaban, str):
        win_umaban = ast.literal_eval(win_umaban)
    
    bet_type = row['bet_type']
    bet_return = row['return']
    bet_amount = 100  # 賭け金（1口100円と仮定）

    hits = 0
    total_return = -bet_amount  # 賭け金を引いた初期値を設定

    # 各買い方ごとの処理
    if bet_type == '単勝':
        if win_umaban[0] in umaban:  # 的中判定
            hits = 1
            total_return = bet_return - bet_amount

    elif bet_type == '複勝':
        if any(w in umaban for w in win_umaban):  # 的中判定
            hits = 1
            total_return = bet_return - bet_amount

    elif bet_type == '馬連':
        if len(win_umaban) == 2 and set(win_umaban).issubset(set(umaban)):  # 的中判定
            hits = 1
            total_return = bet_return - bet_amount

    elif bet_type == '馬単':
        if len(win_umaban) == 2 and win_umaban[0] == umaban[0] and win_umaban[1] in umaban[1:]:
            hits = 1
            total_return = bet_return - bet_amount

    elif bet_type == '三連複':
        if len(win_umaban) == 3 and set(win_umaban).issubset(set(umaban)):  # 的中判定
            hits = 1
            total_return = bet_return - bet_amount

    elif bet_type == '三連単':
        if len(win_umaban) == 3 and win_umaban == umaban[:3]:  # 順序を考慮した的中判定
            hits = 1
            total_return = bet_return - bet_amount

    return pd.Series([hits, total_return])

# 2. 的中率と回収率の計算
df[['hits', 'total_return']] = df.apply(calculate_hit_and_return, axis=1)

# 3. 集計
summary = df.groupby('bet_type').agg(
    total_bets=('hits', 'count'),
    total_hits=('hits', 'sum'),
    total_return=('total_return', 'sum')
).reset_index()

summary['hit_rate'] = summary['total_hits'] / summary['total_bets']
summary['return_rate'] = summary['total_return'] / (summary['total_bets'] * 100)  # 総賭け金（100円 × 総賭け数）で割る

# 結果表示
print(summary)




  bet_type  total_bets  total_hits  total_return  hit_rate  return_rate
0      ワイド       31310           0      -3131000  0.000000    -1.000000
1      三連単       10491         227        992980  0.021638     0.946507
2      三連複       10448        3685      12581370  0.352699    12.041893
3       単勝       10437        8118       3592470  0.777810     3.442052
4       複勝       30959       22099       1617950  0.713815     0.522611
5       馬単       10457        2221       4067220  0.212394     3.889471
6       馬連       10434        5859       9605240  0.561530     9.205712


In [29]:
# データの型情報を確認
print(df.info())

# 先頭5行を確認
print(df.head())

# データの一部を確認 (特定の列)
print(df[['umaban', 'win_umaban', 'bet_type']].head())

# データ型が文字列でリスト形式の場合のサンプル
print(df['umaban'].iloc[0])  # 最初の値を確認
print(df['win_umaban'].iloc[0])


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114536 entries, 0 to 114535
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   race_id       114536 non-null  int64 
 1   umaban        114536 non-null  object
 2   bet_type      114536 non-null  object
 3   win_umaban    114536 non-null  object
 4   return        114536 non-null  int64 
 5   hits          114536 non-null  int64 
 6   total_return  114536 non-null  int64 
dtypes: int64(4), object(3)
memory usage: 6.1+ MB
None
        race_id             umaban bet_type win_umaban  return  hits  \
0  201703030102  [15, 12, 8, 6, 9]       単勝       [10]    2620     0   
1  201703030102  [15, 12, 8, 6, 9]       複勝       [10]     470     0   
2  201703030102  [15, 12, 8, 6, 9]       複勝        [8]     140     0   
3  201703030102  [15, 12, 8, 6, 9]       複勝       [15]     180     0   
4  201703030102  [15, 12, 8, 6, 9]       馬連    [8, 10]    3530     0   

   total_return

In [43]:
def calculate_hit_and_return(row):
    # umaban と win_umaban をリストに変換（必要なら）
    umaban = row['umaban']
    win_umaban = row['win_umaban']
    
    if isinstance(umaban, str):
        umaban = ast.literal_eval(umaban)
    if isinstance(win_umaban, str):
        win_umaban = ast.literal_eval(win_umaban)
    
    bet_type = row['bet_type']
    bet_return = row['return']  # 払い戻し額を取得
    bet_amount = 100  # 賭け金（1口100円と仮定）

    hits = 0
    total_return = -bet_amount  # 賭け金を引いた初期値を設定
    # 軸馬2頭を選ぶ
    if len(umaban) < 2:
        return pd.Series([0, 0])  # umabanが2頭未満の場合、計算不能
    
    axis_horses = umaban[:2]  # 先頭2頭を軸とする（例: [15, 12]）
    remaining_horses = umaban[2:]  # 残りの馬（例: [8, 6, 9]）

    # 買い方ごとの組み合わせを計算
    hits = 0
    total_return = 0

    if bet_type == '馬連':
        # 軸2頭と残りの馬から1頭を選ぶ組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 1))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬2頭 + 残りの馬1頭
            if ticket.issubset(win_umaban):  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    elif bet_type == '三連複':
        # 軸2頭と残りの馬から1頭を選ぶ組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 1))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬2頭 + 残りの馬1頭
            if ticket.issubset(win_umaban):  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    elif bet_type == '三連単':
        # 軸2頭と残りの馬の順番を考慮した組み合わせ
        combinations = list(itertools.permutations(remaining_horses, 1))
        for comb in combinations:
            ticket = axis_horses + list(comb)  # 順番を含めた組み合わせ
            if ticket == win_umaban:  # 的中条件
                hits += 1
                total_return += bet_return  # 払い戻しを加算

    # 的中数と総払い戻しを返す
    return pd.Series([hits, total_return])

# 2. 的中率と回収率の計算
df[['hits', 'total_return']] = df.apply(calculate_hit_and_return, axis=1)

# 3. 集計
summary = df.groupby('bet_type').agg(
    total_bets=('hits', 'count'),
    total_hits=('hits', 'sum'),
    total_return=('total_return', 'sum')
).reset_index()

summary['hit_rate'] = summary['total_hits'] / summary['total_bets']
summary['return_rate'] = summary['total_return'] / (summary['total_bets'] * 100)  # 総賭け金（100円 × 総賭け数）で割る

# 結果表示
print(summary)


  bet_type  total_bets  total_hits  total_return  hit_rate  return_rate
0      ワイド       31310           0             0  0.000000     0.000000
1      三連単       10491         525       6023870  0.050043     5.741941
2      三連複       10448        2009       5625900  0.192286     5.384667
3       単勝       10437           0             0  0.000000     0.000000
4       複勝       30959           0             0  0.000000     0.000000
5       馬単       10457           0             0  0.000000     0.000000
6       馬連       10434           0             0  0.000000     0.000000


In [15]:

def calculate_hit_and_return(row):
    # umaban と win_umaban をリストに変換
    umaban = ast.literal_eval(row['umaban'])  # '[15, 12, 8, 6, 9]' → [15, 12, 8, 6, 9]
    win_umaban = ast.literal_eval(row['win_umaban'])  # '[10]' → [10]

    bet_type = row['bet_type']
    bet_amount = row['bet_amount']  # 1点あたりの賭け金
    payout = row['payout']  # 払い戻し金額

    # 軸馬2頭を選ぶ
    if len(umaban) < 2:
        return pd.Series([0, 0])  # umabanが2頭未満の場合、計算不能
    
    axis_horses = umaban[:2]  # 先頭2頭を軸とする（例: [15, 12]）
    remaining_horses = umaban[2:]  # 残りの馬（例: [8, 6, 9]）

    # 買い方ごとの組み合わせを計算
    hits = 0
    total_return = 0

    if bet_type == '馬連':
        # 軸2頭と残りの馬から1頭を選ぶ組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 1))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬2頭 + 残りの馬1頭
            if ticket.issubset(win_umaban):  # 的中条件
                hits += 1
                total_return += payout  # 払い戻しを加算

    elif bet_type == '三連複':
        # 軸2頭と残りの馬から1頭を選ぶ組み合わせ
        combinations = list(itertools.combinations(remaining_horses, 1))
        for comb in combinations:
            ticket = set(axis_horses + list(comb))  # 軸馬2頭 + 残りの馬1頭
            if ticket.issubset(win_umaban):  # 的中条件
                hits += 1
                total_return += payout  # 払い戻しを加算

    elif bet_type == '三連単':
        # 軸2頭と残りの馬の順番を考慮した組み合わせ
        combinations = list(itertools.permutations(remaining_horses, 1))
        for comb in combinations:
            ticket = axis_horses + list(comb)  # 順番を含めた組み合わせ
            if ticket == win_umaban:  # 的中条件
                hits += 1
                total_return += payout  # 払い戻しを加算

    # 的中数と総払い戻しを返す
    return pd.Series([hits, total_return])
# 2. 的中率と回収率の計算
df[['hits', 'total_return']] = df.apply(calculate_hit_and_return, axis=1)

# 3. 集計
summary = df.groupby('bet_type').agg(
    total_bets=('hits', 'count'),
    total_hits=('hits', 'sum'),
    total_return=('total_return', 'sum')
).reset_index()

summary['hit_rate'] = summary['total_hits'] / summary['total_bets']
summary['return_rate'] = summary['total_return'] / (summary['total_bets'] * 100)  # 総賭け金（100円 × 総賭け数）で割る

# 結果表示
print(summary)


NameError: name 'ast' is not defined