In [1]:
import math
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy
import pandas as pd
import time
from sklearn.preprocessing import LabelEncoder
from datetime import datetime as dt
import random
import copy 
pd.set_option('display.max_columns', None)
df = pd.read_csv('data/keiba_feature_preprocessed_all.csv')
sinba = list(map(lambda x: ('新馬' in x) if not pd.isna(x) else True, df['race_name'].to_numpy()))
delete_index = df.index[sinba | (df["course"]=='障') | (df["rank"]=="中止") | (df["rank"]=='取消') | (df["rank"]=="失格") | (df['rank']=='除外') | pd.isna(df['rank'])]
df.drop(delete_index, inplace=True)
df.reset_index(inplace=True, drop=True)

In [2]:
#for name in df['race_name'].unique():
#    print(name)

In [3]:
categorical = [
    'field', 
    'race_name', 
    'course', 
    'gender',  
    #'trainerA', 
    'horse_name',
    'trainerB', 
    'jackie', 
    'cond', 
    'turn',
    'weather', 
    'wakuban', 
    #'umaban', 
    'age', 
    'pre_rank', 
    'pre2_rank', 
    'pre3_rank', 
    #'pre4_rank', 
    #'pre5_rank',
]


In [4]:
scaling_columns = [
    'head_count', 
    'weight', 
    'c_weight',
    'j_weight', 
    'distance',
    'pre_weight', 
    'sum_prize', 
    'pre_speed', 
    'pre2_speed',
    'pre3_speed', 
    #'pre4_speed', 
    #'pre5_speed', 
    'from_last_day', 
    'rentai',
]

In [5]:
df[categorical] = df[categorical].fillna('nan').astype(str)
le=LabelEncoder()
keiba_categorical = df[categorical].apply(le.fit_transform)
keiba_categorical = keiba_categorical.rename(columns={c: c+'_c' for c in categorical})
df = pd.concat([df, keiba_categorical],axis=1)
# 変換前と不要な列を削除
df.drop(columns=categorical, inplace=True)

In [6]:
from sklearn.preprocessing import StandardScaler
df[scaling_columns] = df[scaling_columns].fillna(0)
scaler = StandardScaler()
scaler.fit(df[scaling_columns])
df[scaling_columns] = pd.DataFrame(scaler.transform(df[scaling_columns]))

In [7]:
#df = df[df['date'] > df['date'].unique()[-1000]]

In [8]:
df.reset_index(inplace=True, drop=True)

In [9]:
class KDataset(Dataset):
    
    def __init__(self, dataframe):
        super().__init__()
        self.df = dataframe
        self.race_list = dataframe['race_num'].unique()
        #self.embedding = nn.Embedding(len(df['horse_name_c'].unique()), 128)
    
    def __getitem__(self, i):
        race_name = self.race_list[i]
        df = self.df[self.df['race_num'] == race_name].sample(frac=1)
        df_x_c = df[[c+'_c' for c in categorical]].to_numpy()
        df_x_f = df[scaling_columns].to_numpy()
        df_odds = df['odds'].to_numpy()
        df_y = df['rank'].to_numpy()
        #horse = torch.tensor(df['horse_name_c'].to_numpy())
        #x = embedding(horse)
        #x = torch.cat([x, torch.zeros(18-x.shape[0], 128)], dim=0)
        #y = df['rank'].to_numpy()
        #y = (y==1).astype(int)
        #y = np.append(y, np.zeros(18-y.shape[0]))
        #y = y[None]
        #y = torch.tensor(y)
        
        return df_x_c, df_x_f, df_y, df_odds
    
    def __len__(self):
        return len(self.race_list)
    
    def collate_fn(self, batch):
        xcs, xfs, ys, odds = [], [], [], []
        for xc, xf, y, odd in batch:
            
            zero = np.zeros((18-len(xc), len(xc[0])))
            xc = np.concatenate([xc, zero], axis=0)
            
            xcs.append(xc)
            
            zero = np.zeros((18-len(xf), len(xf[0])))
            xf = np.concatenate([xf, zero], axis=0)
            
            xfs.append(xf)
            
            
            y_numpy = y.astype(int)
            ys.append(torch.from_numpy(y_numpy))
            
            odds.append(torch.from_numpy(odd))
        xcs = np.stack(xcs, axis=0)
        xfs = np.stack(xfs, axis=0)
        return torch.from_numpy(xcs).long(), torch.from_numpy(xfs), ys, odds
        

In [10]:
df['date'] = pd.to_datetime(df['date'])

In [11]:
date_2020 = dt(2020, 1, 1)
date_2021 = dt(2021, 1, 1)

In [12]:
df_train = df[df['date'] < date_2020]
df_val = df[(date_2020 < df['date']) & (df['date'] < date_2021)]
df_test = df[date_2021 < df['date']]

In [13]:
keiba_train = KDataset(df_train)
keiba_val = KDataset(df_val)
keiba_test = KDataset(df_test)

In [14]:
keiba_trainloader = DataLoader(keiba_train, shuffle=True, batch_size=240, collate_fn=keiba_train.collate_fn)
keiba_valloader = DataLoader(keiba_val, shuffle=False, batch_size=240, collate_fn=keiba_val.collate_fn)
keiba_testloader = DataLoader(keiba_test, shuffle=False, batch_size=240, collate_fn=keiba_test.collate_fn)

In [15]:
class Transformer(nn.Module):
    """
    Classic Transformer that both encodes and decodes.
    
    Prediction-time inference is done greedily.
    NOTE: start token is hard-coded to be 0, end token to be 1. If changing, update predict() accordingly.
    """

    def __init__(self, nhead=4, num_layers=4, embedding_dim=4):
        super().__init__()

        # Parameters
        #categorical = ['field', 'race_name', 'course', 'horse_name', 'gender', 'trainerA', 'trainerB', 'jackie', 'cond', 'turn', 'weather', 'wakuban', 'umaban', 'age']
        self.categorical_list = [c+'_c' for c in categorical]
        self.feature_list = scaling_columns
        self.dim = embedding_dim * len(self.categorical_list) + len(self.feature_list)
        dim_feedforward = self.dim
            
        # Embedding part
        self.embeddings = nn.ModuleDict({name: nn.Embedding(len(df[name].unique()), embedding_dim) for name in self.categorical_list})
        # Encoder part
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer=nn.TransformerEncoderLayer(d_model=self.dim, nhead=nhead, dim_feedforward=dim_feedforward),
            num_layers=num_layers
        )

        # Decoder part
        self.transformer_decoder = nn.TransformerDecoder(
            decoder_layer=nn.TransformerDecoderLayer(d_model=self.dim, nhead=nhead, dim_feedforward=dim_feedforward),
            num_layers=num_layers
        )
        self.fc = nn.Linear(self.dim, 1)

        # It is empirically important to initialize weights properly
        self.init_weights()
    
    def init_weights(self):
        initrange = 0.1
        self.fc.bias.data.zero_()
        self.fc.weight.data.uniform_(-initrange, initrange)
      
    def forward(self, xcs, xfs) -> torch.Tensor:
        """
        Input
            dfs: list of dataframe
        Output
            (B, C, Sy) logits
        """
        x = self.embedding(xcs, xfs)
        encoded_x = self.encode(x)  # (member, B, E)
        output = self.decode(encoded_x)  # (B, member)
        return output  # (B, member)

    def encode(self, x: torch.Tensor) -> torch.Tensor:
        """
        Input
            x: (B, member, E)
        Output
            (member, B, E)
        """
        
        x = x.permute(1, 0, 2)  # (member, B, E)
        #x = self.embedding(x) * math.sqrt(self.dim)  # (Sx, B, E)
        #x = self.pos_encoder(x)  # (Sx, B, E)
        
        x = self.transformer_encoder(x)  # (member, B, E)
        return x
    
    def decode(self, encoded_x):
        """
        Input
            encoded_x: (member, B, E)
        Output
            (B, member) logits
        """
        #print(self.m)
        encoded_x = encoded_x.permute(1, 0, 2) # (B, member, E)
        #print(encoded_x.shape)
        output = self.fc(encoded_x)  # (B, member, 1)
        output = output.squeeze(2)
        return output

    def predict(self, x: torch.Tensor) -> torch.Tensor:
        """
        Method to use at inference time. Predict y from x one token at a time. This method is greedy
        decoding. Beam search can be used instead for a potential accuracy boost.
        Input
            x: (B, Sx) with elements in (0, C) where C is num_classes
        Output
            (B, C, Sy) logits
        """
        encoded_x = self.encode(x)
        
        output = self.decode(encoded_x)
        return output
    
    def fullfill(self, x):
        if len(x.shape) == 1:
            feature_num = 1
            return torch.cat([x, torch.zeros(18-len(x)).cuda()], dim=0)
        else:
            feature_num = x.shape[1]
        return torch.cat([x, torch.zeros(18-len(x), feature_num).cuda()], dim=0)
    
    def embedding(self, xcs, xfs):
        
        category_list = []
        for i, name in enumerate(self.categorical_list):
            xc = self.embeddings[name](xcs[:, :, i])
            category_list.append(xc)
        categorical_x = torch.cat(category_list, dim=2) #bs, seq, features
        x = torch.cat([categorical_x, xfs*10], dim=2)
        
        return x.cuda().float()

In [16]:
embedding_dim = 4

In [17]:
dim = embedding_dim * len(categorical) + len(scaling_columns)
dim

72

In [18]:
model = Transformer(nhead=1, num_layers=4, embedding_dim=embedding_dim)
model.cuda()
criterion = nn.BCEWithLogitsLoss(reduction='none') #nn.CrossEntropyLoss() #
#optimizer = torch.optim.Adam(model.parameters() , lr=1e-3)

In [19]:
optimizer = torch.optim.Adam(model.parameters() , lr=1e-3)
val_best = 1e9
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)

In [20]:
models4 = []
for epoch in range(3):
    model.train()
    for i, (xcs, xfs, ys, odds) in enumerate(keiba_trainloader):
        optimizer.zero_grad()
        output = model(xcs.cuda(), xfs.cuda())
        #y_true = torch.tensor([y.min(dim=0)[1] for y in ys])
        th = random.choice([1, 2, 3])
        th = 3
        y_true = torch.stack([torch.cat([(y<=th).long(), torch.zeros(18-len(y))], dim=0) for y in ys], dim=0).cuda()
        y_1st = torch.stack([torch.cat([(y<=1).long(), torch.zeros(18-len(y))], dim=0) for y in ys], dim=0).cuda()
        
        loss = criterion(output, y_true)
        odds = torch.stack([torch.cat([odd, torch.ones(18-len(odd))], dim=0) for odd in odds], dim=0).cuda().float()
        odds = torch.nan_to_num(odds)
        odds = torch.where(y_true==1, torch.sqrt(odds), torch.tensor([1.]).cuda())
        #odds = torch.where(y_true==1, torch.tensor([2.]).cuda(), torch.tensor([1.]).cuda())
        #loss = (loss * (odds))
        loss = loss.mean()
        
        
        print(f'{i} / {len(keiba_trainloader)} {loss.item()}', end='\r')
        loss.backward()
        #print(loss.item())
        optimizer.step()
    test_scores = []
    win_flags = []
    odds_list = []
    model.eval()
    dloader = keiba_valloader
    for i, (xcs, xfs, ys, odds) in enumerate(dloader):
        print(f'{i} / {len(dloader)}', end='\r')
        output = model(xcs.cuda(), xfs.cuda())
        output = torch.sigmoid(output)
        test_scores.append(output)
        y_true = torch.tensor([y.min(dim=0)[1].item() for y in ys]).cuda()
        y_onehot = F.one_hot(y_true, 18).float()
        win_flags.append(y_onehot)
        odds = torch.stack([torch.cat([odd, 100*torch.ones(18-len(odd))], dim=0) for odd in odds], dim=0)
        
        odds_list.append(odds)
        
    test_scores = torch.cat(test_scores, dim=0)
    win_flags = torch.cat(win_flags, dim=0)
    odds_list = torch.cat(odds_list, dim=0).cuda()
    odds_list = torch.nan_to_num(odds_list)
    thresh = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

    for th in thresh:
        gap = (torch.sort(test_scores, dim=1)[0][:, -1] - torch.sort(test_scores, dim=1)[0][:, -2]) > 0.2
        gap = gap.unsqueeze(1)
        max_score = test_scores.max(dim=1)[0].unsqueeze(1)
        buy_flags = (test_scores > th) * gap #* F.one_hot(odds_list.min(dim=1)[1], 18)#* (odds_list > 3)
        win = (odds_list * (test_scores >= max_score) * buy_flags * win_flags).sum()
        hit = ((test_scores >= max_score) * buy_flags * win_flags).sum()
        cost = buy_flags.sum() + 1e-10
        print(f'th {th} recovery {win/cost*100}, hit {hit}, cost {cost}, accuracy {hit/cost}')
    
    val_loss = 0
    count = 0
    for i, (xcs, xfs, ys, odds) in enumerate(keiba_valloader):
        optimizer.zero_grad()
        output = model(xcs.cuda(), xfs.cuda())
        #y_true = torch.tensor([y.min(dim=0)[1] for y in ys])
        th = random.choice([1, 2, 3])
        th = 1
        y_true = torch.stack([torch.cat([(y<=th).long(), torch.zeros(18-len(y))], dim=0) for y in ys], dim=0).cuda()
        
        loss = criterion(output, y_true)
        count += loss.shape.numel()
        loss = loss.sum()
        val_loss += loss
    val_loss /= count
    print(val_loss)
    if val_loss < val_best:
        val_best = val_loss
        model_best = copy.deepcopy(model)
    models4.append(copy.deepcopy(model))

th 0.0 recovery 3.693236714975845, hit 68.0, cost 4140.0, accuracy 0.01642512157559395
th 0.1 recovery 5.743801652892562, hit 68.0, cost 2662.0, accuracy 0.025544703006744385
th 0.2 recovery 11.797839506172837, hit 68.0, cost 1296.0, accuracy 0.05246913433074951
th 0.3 recovery 25.740740740740737, hit 68.0, cost 594.0, accuracy 0.11447811126708984
th 0.4 recovery 51.655405405405396, hit 68.0, cost 296.0, accuracy 0.22972972691059113
th 0.5 recovery 62.920353982300895, hit 65.0, cost 226.0, accuracy 0.28761062026023865
th 0.6 recovery 73.80952380952381, hit 50.0, cost 147.0, accuracy 0.3401360511779785
th 0.7 recovery 98.00000000000001, hit 17.0, cost 40.0, accuracy 0.42500001192092896
th 0.8 recovery 0.0, hit 0.0, cost 1.000000013351432e-10, accuracy 0.0
th 0.9 recovery 0.0, hit 0.0, cost 1.000000013351432e-10, accuracy 0.0
tensor(0.2512, device='cuda:0', grad_fn=<DivBackward0>)
th 0.0 recovery 3.667486674866748, hit 78.0, cost 4878.0, accuracy 0.015990160405635834
th 0.1 recovery 6.18

In [947]:
#model = models0[2] #0.8
#model = models1[1] #0.7
#model = models2[0] #0.7
model = models3[2] #0.7
#model = models4[1] 0.8

In [948]:
test_scores = []
win_flags = []
odds_list = []
model.eval()
dloader = keiba_testloader
for i, (xcs, xfs, ys, odds) in enumerate(dloader):
    print(f'{i} / {len(dloader)}', end='\r')
    output = model(xcs.cuda(), xfs.cuda())
    output = torch.sigmoid(output)
    test_scores.append(output)
    y_true = torch.tensor([y.min(dim=0)[1].item() for y in ys]).cuda()
    y_onehot = F.one_hot(y_true, 18).float()
    win_flags.append(y_onehot)
    odds = torch.stack([torch.cat([odd, 100*torch.ones(18-len(odd))], dim=0) for odd in odds], dim=0)
    
    odds_list.append(odds)
test_scores = torch.cat(test_scores, dim=0)
win_flags = torch.cat(win_flags, dim=0)
odds_list = torch.cat(odds_list, dim=0).cuda()
odds_list = torch.nan_to_num(odds_list)
ranking = torch.sort(odds_list, dim=1)[1] + 1

12 / 13

In [960]:
thresh = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for th in thresh:
    th = 0.7
    gap = (torch.sort(test_scores, dim=1)[0][:, -1] - torch.sort(test_scores, dim=1)[0][:, -2]) > 0.2
    gap = gap.unsqueeze(1)
    max_score = test_scores.max(dim=1)[0].unsqueeze(1)
    buy_flags = (test_scores > th) * gap #* F.one_hot(odds_list.min(dim=1)[1], 18)#* (odds_list > 3)
    win = (odds_list * (test_scores >= max_score) * buy_flags * win_flags).sum()
    hit = ((test_scores >= max_score) * buy_flags * win_flags).sum()
    cost = buy_flags.sum() + 1e-10
    print(f'th {th} recovery {win/cost*100}, hit {hit}, cost {cost}, accuracy {hit/cost}')
    

th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936
th 0.7 recovery 85.35714285714285, hit 51.0, cost 112.0, accuracy 0.4553571343421936


In [950]:
torch.nonzero(buy_flags)[:, 1]

tensor([ 1,  0,  0,  0,  0,  5,  3,  0,  2,  0,  0,  4,  0,  0, 10,  0,  0,  1,
         0,  1,  1,  2,  5,  0,  0,  0,  0,  0,  0,  1,  3,  2,  1,  3,  1,  7,
         0,  0,  7,  0,  4,  0,  0,  0,  2,  4,  3,  2,  4,  0,  1, 11,  2,  0,
         4, 11,  1,  1,  7,  0,  0,  0,  0,  0,  0,  0,  5,  3,  0,  6,  3,  5,
         0,  0,  1,  5,  7,  0,  3,  3,  2,  8,  0,  0,  2,  0,  2,  1,  4,  1,
         0,  2,  0,  3,  3,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0, 15, 14,
         1,  4,  1,  1], device='cuda:0')

In [963]:
a = (buy_flags * ranking).flatten()
a[a.nonzero()].flatten()

tensor([ 5,  1,  1,  1,  1,  9,  2,  1,  2,  1,  1,  1,  1,  1, 13,  1,  1,  1,
         1,  3,  1,  7, 16,  1,  1,  1,  1,  1,  1,  9,  1,  4,  3,  2,  4,  7,
         1,  1,  7,  1, 14,  1,  1,  1,  4,  4,  5,  5,  8,  1,  3, 17,  4,  1,
         6,  8,  3,  1,  5,  1,  1,  1,  5,  1,  1,  1,  8,  1,  1,  6,  3,  2,
         1,  1,  4,  4,  3,  1,  1,  3,  6, 10,  1,  1,  7,  1,  2,  7,  6,  8,
         3,  1,  1,  5,  3, 11,  1,  1,  1,  1,  4,  1,  3,  1,  1,  1, 14, 16,
         4, 10,  1,  4], device='cuda:0')

In [971]:
b = (buy_flags * odds_list).flatten()
b[b.nonzero()].flatten()

tensor([1.7000, 2.8000, 1.2000, 1.2000, 1.7000, 2.5000, 2.5000, 1.8000, 1.5000,
        2.5000, 1.7000, 1.6000, 1.6000, 1.7000, 2.6000, 1.4000, 1.6000, 1.5000,
        3.1000, 1.6000, 2.0000, 2.2000, 1.7000, 1.5000, 1.4000, 1.3000, 1.2000,
        1.4000, 1.6000, 3.0000, 3.7000, 2.8000, 1.8000, 1.8000, 1.7000, 3.6000,
        1.4000, 1.7000, 2.4000, 1.5000, 1.8000, 1.7000, 1.5000, 1.6000, 1.7000,
        1.8000, 1.4000, 2.3000, 3.9000, 1.6000, 2.4000, 1.3000, 2.5000, 2.4000,
        1.8000, 1.8000, 1.8000, 2.1000, 1.9000, 1.7000, 2.0000, 1.3000, 8.0000,
        1.3000, 2.9000, 1.3000, 4.5000, 3.1000, 2.6000, 7.3000, 4.4000, 2.1000,
        1.3000, 1.3000, 2.3000, 5.2000, 1.4000, 1.1000, 1.4000, 2.2000, 2.9000,
        1.5000, 1.5000, 1.5000, 1.7000, 2.6000, 1.7000, 1.5000, 3.6000, 1.6000,
        5.2000, 1.5000, 2.0000, 2.2000, 2.4000, 1.7000, 1.4000, 1.2000, 1.9000,
        1.1000, 2.6000, 1.5000, 2.5000, 1.9000, 1.3000, 1.1000, 1.5000, 4.2000,
        1.6000, 1.5000, 1.6000, 2.3000],

In [972]:
a[a.nonzero()].flatten() * (torch.nonzero(buy_flags)[:, 1] != 0)

tensor([ 5,  0,  0,  0,  0,  9,  2,  0,  2,  0,  0,  1,  0,  0, 13,  0,  0,  1,
         0,  3,  1,  7, 16,  0,  0,  0,  0,  0,  0,  9,  1,  4,  3,  2,  4,  7,
         0,  0,  7,  0, 14,  0,  0,  0,  4,  4,  5,  5,  8,  0,  3, 17,  4,  0,
         6,  8,  3,  1,  5,  0,  0,  0,  0,  0,  0,  0,  8,  1,  0,  6,  3,  2,
         0,  0,  4,  4,  3,  0,  1,  3,  6, 10,  0,  0,  7,  0,  2,  7,  6,  8,
         0,  1,  0,  5,  3, 11,  0,  0,  0,  0,  4,  0,  0,  0,  0,  0, 14, 16,
         4, 10,  1,  4], device='cuda:0')

In [973]:
a[a.nonzero()].flatten() * (torch.nonzero(buy_flags)[:, 1] == 0)

tensor([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
        1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 5, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0,
        1, 1, 1, 1, 0, 1, 3, 1, 1, 1, 0, 0, 0, 0, 0, 0], device='cuda:0')

In [974]:
((a[a.nonzero()].flatten() * (torch.nonzero(buy_flags)[:, 1] == 0)) > 1) * b[b.nonzero()].flatten()

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 8.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        5.2000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 2.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000],

In [962]:
a = (buy_flags * odds_list* win_flags).flatten()
a[a.nonzero()].flatten()

tensor([2.8000, 1.2000, 1.2000, 1.7000, 1.8000, 2.5000, 1.7000, 1.6000, 1.7000,
        1.4000, 1.6000, 3.1000, 1.5000, 1.4000, 1.3000, 1.2000, 1.4000, 1.6000,
        1.4000, 1.7000, 1.5000, 1.7000, 1.5000, 1.6000, 1.6000, 2.4000, 1.7000,
        2.0000, 1.3000, 8.0000, 1.3000, 2.9000, 1.3000, 2.6000, 1.3000, 1.3000,
        1.1000, 1.5000, 1.5000, 2.6000, 5.2000, 2.0000, 1.4000, 1.2000, 1.9000,
        1.1000, 1.5000, 2.5000, 1.9000, 1.3000, 1.1000], device='cuda:0',
       dtype=torch.float64)

In [958]:
(buy_flags * odds_list* win_flags).sum()

tensor(95.6000, device='cuda:0', dtype=torch.float64)

In [959]:
(odds_list * (test_scores >= max_score) * buy_flags * win_flags).sum()

tensor(95.6000, device='cuda:0', dtype=torch.float64)

In [826]:
# 1人気の馬
buy_flags = F.one_hot(odds_list.min(dim=1)[1], 18)
win = (odds_list * buy_flags * win_flags).sum()
hit = (buy_flags * win_flags).sum()
cost = buy_flags.sum() + 1e-10
print(f'recovery {win/cost*100}, hit {hit}, cost {cost}, accuracy {hit/cost}')

recovery 79.86819871578237, hit 963.0, cost 2959.0, accuracy 0.32544779777526855


In [90]:
(odds_list * buy_flags * win_flags)[:, 0].sum() / (buy_flags * win_flags).sum()

tensor(3.6789, device='cuda:0', dtype=torch.float64)

In [104]:
def test_one_day(df, tday):
    df_test = df[df['date']==tday]
    trainval_num = (df['date'] < tday).sum()
    val_num = int(trainval_num / 5)
    train_num = trainval_num - val_num
    while 1:
        if df.iloc[train_num-1]['race_num'] == df.iloc[train_num]['race_num']:
            train_num += 1
        else:
            break
    df_train = df[:train_num]
    df_val = df[train_num:trainval_num]
    
    keiba_train = KDataset(df_train)
    keiba_val = KDataset(df_val)
    keiba_test = KDataset(df_test)
    
    keiba_trainloader = DataLoader(keiba_train, shuffle=True, batch_size=240, collate_fn=keiba_train.collate_fn)
    keiba_valloader = DataLoader(keiba_val, shuffle=True, batch_size=240, collate_fn=keiba_val.collate_fn)
    keiba_testloader = DataLoader(keiba_test, shuffle=False, batch_size=1, collate_fn=keiba_test.collate_fn)
    
    model = Transformer(nhead=5, num_layers=10, embedding_dim=embedding_dim)
    model.cuda()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)
    
    for epoch in range(1):
        model.train()
        for i, (xcs, xfs, ys, odds) in enumerate(keiba_trainloader):
            optimizer.zero_grad()
            output = model(xcs.cuda(), xfs.cuda())
            #y_true = torch.tensor([y.min(dim=0)[1] for y in ys])
            y_true = torch.stack([torch.cat([(y<=3).long(), torch.zeros(18-len(y))], dim=0) for y in ys], dim=0)
            loss = criterion(output, y_true.cuda())
            print(f'{i} / {len(keiba_trainloader)} {loss.item()}', end='\r')
            loss.backward()
            #print(loss.item())
            optimizer.step()
        
        model.eval()
        val_loss = []
        for i, (xcs, xfs, ys, odds) in enumerate(keiba_valloader):
            print(f'{i} / {len(keiba_valloader)}', end='\r')
            output = model(xcs.cuda(), xfs.cuda())
            #y_true = torch.tensor([y.min(dim=0)[1] for y in ys])
            y_true = torch.stack([torch.cat([(y<=3).long(), torch.zeros(18-len(y))], dim=0) for y in ys], dim=0)
            loss = criterion(output, y_true.cuda())
            val_loss.append(loss)
            break
        
        test_scores = []
        win_flags = []
        odds_list = []
        for i, (xcs, xfs, ys, odds) in enumerate(keiba_testloader):
            print(f'{i} / {len(keiba_testloader)}', end='\r')
            output = model(xcs.cuda(), xfs.cuda())
            output = torch.sigmoid(output)
            test_scores.append(output)
            y_true = torch.tensor([y.min(dim=0)[1].item() for y in ys]).cuda()
            y_onehot = F.one_hot(y_true, 18).float()
            win_flags.append(y_onehot)
            odds = torch.stack([torch.cat([odd, torch.zeros(18-len(odd))], dim=0) for odd in odds], dim=0)
            odds_list.append(odds)
        return test_scores, win_flags, odds_list               

In [105]:
test_scores, win_flags, odds_list = [], [], []
test_from = 10

for i, tday in enumerate(np.sort(df['date'].unique()[-test_from:])):
    print(f'{i} / {len(np.sort(df["date"].unique()[-test_from:]))}')
    s, w, o = test_one_day(df, tday)
    test_scores.extend(s)
    win_flags.extend(w)
    odds_list.extend(o)

0 / 10
1 / 10350 0.39617770910263063
2 / 10550 0.41054525971412664
3 / 10550 0.38854262232780457
4 / 10450 0.39620646834373474
5 / 10550 0.40993005037307746
6 / 10550 0.40325838327407837
7 / 10550 0.41802340745925903
8 / 10351 0.39674076437950134
9 / 10351 0.45109203457832336
22 / 2351 0.39875671267509464

In [106]:
test_scores = torch.cat(test_scores, dim=0)
win_flags = torch.cat(win_flags, dim=0)
odds_list = torch.cat(odds_list, dim=0).cuda()

In [112]:
thresh = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for th in thresh:
    gap = (torch.sort(test_scores, dim=1)[0][:, -1] - torch.sort(test_scores, dim=1)[0][:, -2]) > 0.
    gap = gap.unsqueeze(1)
    max_score = test_scores.max(dim=1)[0].unsqueeze(1)
    buy_flags = (test_scores > th) * gap
    win = (odds_list * (test_scores >= max_score) * buy_flags * win_flags).sum()
    hit = ((test_scores >= max_score) * buy_flags * win_flags).sum()
    cost = buy_flags.sum() + 1e-10
    print(f'th {th} recovery {win/cost*100}, hit {hit}, cost {cost}')

th 0.0 recovery 6.675925925925926, hit 50.0, cost 5400.0
th 0.1 recovery 8.446579194001874, hit 50.0, cost 4268.0
th 0.2 recovery 19.51900698215671, hit 45.0, cost 1289.0
th 0.3 recovery 36.407407407407405, hit 23.0, cost 270.0
th 0.4 recovery 43.134328358208954, hit 9.0, cost 67.0
th 0.5 recovery 0.0, hit 0.0, cost 5.0
th 0.6 recovery 0.0, hit 0.0, cost 1.000000013351432e-10
th 0.7 recovery 0.0, hit 0.0, cost 1.000000013351432e-10
th 0.8 recovery 0.0, hit 0.0, cost 1.000000013351432e-10
th 0.9 recovery 0.0, hit 0.0, cost 1.000000013351432e-10


In [108]:
test_scores > 0.1

tensor([[0.2123, 0.1828, 0.2272,  ..., 0.1513, 0.0420, 0.0420],
        [0.1333, 0.1214, 0.1114,  ..., 0.1108, 0.0723, 0.1382],
        [0.1124, 0.1278, 0.1118,  ..., 0.3269, 0.0326, 0.0326],
        ...,
        [0.2358, 0.2683, 0.2689,  ..., 0.0344, 0.0344, 0.0344],
        [0.1966, 0.1927, 0.1961,  ..., 0.0569, 0.0569, 0.0569],
        [0.2221, 0.2532, 0.1674,  ..., 0.3429, 0.1107, 0.1107]],
       device='cuda:0', grad_fn=<CatBackward>)