In [239]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
import random
import tqdm
from torch import nn
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
np.set_printoptions(threshold=200)
torch.set_printoptions(edgeitems=200)

In [240]:
import pickle
with open("pickle/pickle_r_all", "rb") as f:
    r = pickle.load(f)
with open("pickle/pickle_p_all", "rb") as f:
    p = pickle.load(f)
# with open("pickle/pickle_st_all", "rb") as f:
#     st = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [241]:
# data = r.data_c[r.data_c['date']>'2024-07-01']
data = r.data_c

In [242]:
import numpy as np
import torch



class RacingDataProcessor:
    def __init__(self, data, feature_size, num_selections,pre_race_rate=0.5,task_type='regression'):
        self.data = data
        self.num_selections = num_selections
        self.task_type = task_type
        self.pre_race_rate = pre_race_rate

        # ID数の取得
        self.num_jockeys = data['jockey_id'].max() + 1
        self.num_horses = data['horse_id'].max() + 1

        # 初期化
        self.feature_size = feature_size
        self.jockey_features = torch.zeros(self.num_jockeys, feature_size, requires_grad=False)
        self.horse_features = torch.zeros(self.num_horses, feature_size, requires_grad=False)

        # カラムのセットアップ
        self.environment_columns = data.columns[data.columns.str.contains('weather_|ground_state_|開催地_|R_')].tolist() + \
                                   ['course_len',  'corse_used_num','horse_len']
        self.horse_columns = data.columns[data.columns.str.contains('性_')].tolist() + \
                             ['枠番','斤量', '年齢', '体重', '体重変化']

    def reset_features(self):
        """騎手と馬の特徴量をリセット"""
        self.jockey_features.zero_()
        self.horse_features.zero_()

    def generate_selection_matrix(self, total_length, num_selections=14):
        """選択リストの生成"""
        indices = np.arange(total_length)
        np.random.shuffle(indices)

        # total_length が num_selections より小さい場合、リピートして num_selections に合わせる
        if total_length < num_selections:
            repeat_factor = (num_selections // total_length) + 1  # 繰り返しの回数を計算
            extended_indices = np.tile(indices, repeat_factor)[:num_selections]
            selection_matrix = np.vstack([np.roll(extended_indices, i) for i in range(total_length)])
        else:
            selection_matrix = np.concatenate([np.roll(indices, i)[:num_selections] for i in range(total_length)], axis=0).reshape(total_length, num_selections)
        
        return selection_matrix

    def convert_to_tensor(self, df, columns):
        """Pandas DataFrameをTensorに変換"""
        return torch.tensor(df[columns].to_numpy(dtype='float32'))
    
    def get_tansho_info(self, df):
        """y__binary_in3をTensorに変換"""
        return torch.tensor(df['単勝_norm'].to_numpy(dtype='float32'))
    
    def get_tansho_return(self, df):
        """y__binary_in3をTensorに変換"""
        return torch.tensor(df['tansho_return'].to_numpy(dtype='float32'))
    
    def get_hukusho_return(self, df):
        """y__binary_in3をTensorに変換"""
        return torch.tensor(df['hukusho_return'].to_numpy(dtype='float32'))
    
    def get_target(self,df):
        if self.task_type == 'regression':
            return torch.tensor(df['y__reg_着順'].to_numpy(dtype='float32'))
        elif self.task_type == 'binary':
            return torch.tensor(df['y__binary_in3'].to_numpy(dtype='float32')).to(dtype=torch.long)
        elif self.task_type == 'list_net':
            return torch.tensor(df['着順'].to_numpy(dtype='float32')*0.1)
        else:
            raise ValueError(f"Unsupported task type: {self.task_type}")
            
        

    def get_previous_race_result(self, df):
        """前走の着順を処理してTensorに変換"""
        previous_result = 2 - df['前走の着順'].to_numpy(dtype='float32') * 0.1
        return torch.tensor(previous_result)

    def prepare_batch_data(self, horse_ids, jockey_ids, df):
        """共通するバッチデータの準備"""
        selection_matrix = self.generate_selection_matrix(len(horse_ids), self.num_selections)
        
        env_tensor = self.convert_to_tensor(df.iloc[0], self.environment_columns)
        env_batch = env_tensor.unsqueeze(0).expand(selection_matrix.shape[0], self.num_selections, -1)

        horse_id_batch = torch.tensor(horse_ids[selection_matrix])
        horse_feature_batch = self.horse_features[horse_ids][selection_matrix]
        jockey_id_batch = torch.tensor(jockey_ids[selection_matrix])
        jockey_feature_batch = self.jockey_features[jockey_ids][selection_matrix]

        return selection_matrix, env_batch, horse_feature_batch, horse_id_batch, jockey_feature_batch, jockey_id_batch

    def process_single_race(self, index):
        """単一レースのデータを取得"""
        df = self.data.loc[index]
        horse_ids = df['horse_id'].unique()
        jockey_ids = df['jockey_id'].unique()

        # バッチデータの準備
        selection_matrix, env_batch, horse_feature_batch, horse_id_batch, jockey_feature_batch, jockey_id_batch = \
            self.prepare_batch_data(horse_ids, jockey_ids, df)

        # 馬の特徴量とターゲットデータの計算
        horse_feature_tensor = self.convert_to_tensor(df, self.horse_columns).view(len(horse_ids), -1)
        target = self.get_target(df)
        tansho = self.get_tansho_info(df)
        return_tansho = self.get_tansho_return(df)
        return_hukusho = self.get_hukusho_return(df)
        previous_results = self.get_previous_race_result(df)

        # 選択されたリストに基づくバッチの作成
        horse_feature_batch_selected = horse_feature_tensor[selection_matrix]
        target_batch = target[selection_matrix]
        tansho_batch = tansho[selection_matrix]
        return_tansho_batch = return_tansho[selection_matrix]
        return_hukusho_batch = return_hukusho[selection_matrix]
        prev_result_batch = previous_results[selection_matrix]

        return horse_id_batch, horse_feature_batch, horse_feature_batch_selected, env_batch, jockey_id_batch, jockey_feature_batch, target_batch, prev_result_batch,tansho_batch,return_tansho_batch,return_hukusho_batch

    def process_minibatch(self, index_list):
        """ミニバッチでレースデータを処理"""
        results = [self.process_single_race(index) for index in index_list]

        # 結果の統合
        return map(lambda t: torch.cat(t, dim=0), zip(*results))

    def update_features(self, new_data, id_list, feature_list):
        """新しいデータに基づいて特徴量を更新"""
        unique_ids = torch.unique(id_list)
        id_data_dict = {id.item(): [] for id in unique_ids}

        for i, ids in enumerate(id_list):
            for j, id in enumerate(ids):
                id_data_dict[id.item()].append(new_data[i, j])

        # 特徴量の更新
        for id, data in id_data_dict.items():
            feature_list[id] = (self.pre_race_rate * feature_list[id] + torch.mean(torch.stack(data), dim=0))/(1+self.pre_race_rate)


In [243]:
import torch
import torch.nn.functional as F
import random
from torch import nn

class FeedForwardLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.1):
        super(FeedForwardLayer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.batch_norm = nn.BatchNorm1d(output_dim)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.linear(x)
        x = self.batch_norm(x)
        x = self.dropout(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_prob=0.9):
        super(ResidualBlock, self).__init__()
        self.feedforward = FeedForwardLayer(input_dim, output_dim)
        self.linear = nn.Linear(output_dim, output_dim)
        self.batch_norm = nn.BatchNorm1d(output_dim)
        self.dropout = nn.Dropout(0.1)
        self.dropout_prob = dropout_prob
        self.shortcut = nn.Linear(input_dim, output_dim) if input_dim != output_dim else None

    def forward(self, x):
        identity = x
        
        if self.training:
            if random.random() < self.dropout_prob:
                out = F.relu(self.feedforward(x))
                out = self.linear(out)
                out = self.batch_norm(out)
            else:
                out = self.shortcut(x) if self.shortcut is not None else x
        else:
            out = F.relu(self.feedforward(x))
            out = self.linear(out)
            out = self.batch_norm(out)

        if self.shortcut is not None:
            identity = self.shortcut(identity)

        out += identity
        out = self.dropout(out)

        return out

class ResidualLayerBlock(nn.Module):
    def __init__(self, layer_dims):
        super(ResidualLayerBlock, self).__init__()
        self.input_dim = layer_dims[0]
        self.layers = self._build_layers(ResidualBlock, layer_dims)

    def _build_layers(self, block, layer_dims):
        layers = []
        num_layers = len(layer_dims)
        input_dim = self.input_dim
        for i, output_dim in enumerate(layer_dims):
            dropout_prob = 1 - i / (2 * num_layers)
            layers.append(block(input_dim, output_dim, dropout_prob))
            input_dim = output_dim
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

class EncoderLayer(nn.Module):
    def __init__(self, layer_dims):
        super(EncoderLayer, self).__init__()
        self.input_dim = layer_dims[0]
        self.layers = self._build_layers(FeedForwardLayer, layer_dims)

    def _build_layers(self, block, layer_dims):
        layers = []
        input_dim = self.input_dim
        for output_dim in layer_dims:
            layers.append(block(input_dim, output_dim))
            layers.append(nn.ReLU())
            input_dim = output_dim
        del layers[-1]  # Remove last ReLU to maintain final structure
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


class BaseHorseRacePredictor(nn.Module):
    def __init__(self, device, data_processor, hidden_size, env_input_dim, env_output_dim, horse_feature_input_dim, horse_feature_output_dim):
        super(BaseHorseRacePredictor, self).__init__()
        self.device = device
        self.num_horses = data_processor.num_horses
        self.num_selections = data_processor.num_selections
        self.hidden_size = hidden_size

        # エンコードレイヤー
        self.env_encoder = EncoderLayer([env_input_dim, 32, env_output_dim])
        self.horse_feature_encoder = EncoderLayer([horse_feature_input_dim, 32, horse_feature_output_dim])

        # 残差ブロック
        self.horse_residual_block = ResidualLayerBlock([hidden_size + env_output_dim + horse_feature_output_dim + 1, 128, 64, hidden_size])
        self.jockey_residual_block = ResidualLayerBlock([hidden_size * 2, 128, 64, hidden_size])
        self.race_residual_block = ResidualLayerBlock([hidden_size * self.num_selections, 128, 64, hidden_size * 3])
        self.listwise_residual_block = ResidualLayerBlock([hidden_size * 4, 128, 64, 32, 16, 8])

    def forward(self, horse_data, jockey_data, env_data, horse_features, prev_race_results):
        batch_size, seq_len, _ = horse_data.size()

        # 環境データと馬の特徴をエンコード
        env_data = env_data.view(-1, env_data.shape[-1])
        encoded_env = self.env_encoder(env_data)

        horse_features = horse_features.view(-1, horse_features.shape[-1])
        encoded_horse_features = self.horse_feature_encoder(horse_features)

        # 馬データをフラット化して組み合わせ
        horse_data_flat = horse_data.view(-1, self.hidden_size)
        prev_race_results = prev_race_results.view(-1, 1)
        combined_horse_data = torch.cat([horse_data_flat, prev_race_results, encoded_horse_features, encoded_env], dim=1)

        # 馬、騎手、レースデータを順次処理
        horse_residual_output = self.horse_residual_block(combined_horse_data)
        jockey_data_flat = jockey_data.view(-1, self.hidden_size)
        combined_jockey_data = torch.cat([horse_residual_output, jockey_data_flat], dim=1)
        jockey_residual_output = self.jockey_residual_block(combined_jockey_data)

        race_data = jockey_residual_output.view(batch_size, -1)
        race_residual_output = self.race_residual_block(race_data)
        race_residual_output = race_residual_output.unsqueeze(1).repeat(1, self.num_selections, 1).view(-1, race_residual_output.shape[-1])

        final_data = torch.cat([race_residual_output, jockey_residual_output], dim=1)
        final_residual_output = self.listwise_residual_block(final_data)
        
        updated_horse_data = horse_residual_output.detach().clone().reshape(horse_data.size())
        updated_jockey_data = jockey_residual_output.detach().clone().reshape(jockey_data.size())


        # 中間結果を返す（出力クラスで利用する）
        return final_residual_output, updated_horse_data, updated_jockey_data


class RegressionOutput(nn.Module):
    def __init__(self, input_dim, output_dim=1):
        super(RegressionOutput, self).__init__()
        self.fc_output = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.fc_output(x)
    
class ClassificationOutput(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassificationOutput, self).__init__()
        self.fc_output = nn.Linear(input_dim, num_classes)
    
    def forward(self, x):
        return self.fc_output(x)
    
class HorseRaceModel(nn.Module):
    def __init__(self, task_type, device, data_processor, hidden_size, env_input_dim, env_output_dim, horse_feature_input_dim, horse_feature_output_dim, output_dim=1, num_classes=2):
        super(HorseRaceModel, self).__init__()
        
        # ベースのモデル（共通の処理部分）
        self.base_model = BaseHorseRacePredictor(device, data_processor, hidden_size, env_input_dim, env_output_dim, horse_feature_input_dim, horse_feature_output_dim)
        
        # タスクに応じた出力クラスの選択
        if task_type == 'regression':
            self.output_layer = RegressionOutput(8, 1)
        elif task_type == 'binary':
            self.output_layer = ClassificationOutput(8, 2)
        elif task_type == 'list_net':
            self.output_layer = RegressionOutput(8, 1)       
        else:
            raise ValueError(f"Unsupported task type: {task_type}")
    
    def forward(self, horse_data, jockey_data, env_data, horse_features, prev_race_results):
        # ベースの処理を実行
        final_residual_output, updated_horse_data, updated_jockey_data = self.base_model(horse_data, jockey_data, env_data, horse_features, prev_race_results)
        
        # タスクに応じた出力を取得
        predictions = self.output_layer(final_residual_output)
        return predictions, updated_horse_data, updated_jockey_data,final_residual_output

In [308]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
import random
import tqdm
from torch import nn
class LearningModule:
    def __init__(self, data_processor, model, dataset,task_type, train_split, device, optimizer, criterion,log_func):
        self.data_processor = data_processor
        self.model = model
        self.dataset = dataset
        self.task_type = task_type
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.log_func = log_func

        self.date_list = self.dataset['date'].sort_values().unique()
        split_index = int(train_split * len(self.date_list))
        self.train_dates = self.date_list[:split_index]
        self.eval_dates = self.date_list[split_index:]

        self.train_losses = []
        self.eval_losses = []

    def process_batch(self, date):
        race_ids = self.dataset[self.dataset['date'] == date].index.unique()
        horse_ids, horse_data, horse_features, env_data, jockey_ids, jockey_data, labels, prev_race_data,tansho_batch,return_tansho_batch,return_hukusho_batch = self.data_processor.process_minibatch(race_ids)

        # Device transfer
        inputs = {
            'horse_data': horse_data.to(self.device),
            'jockey_data': jockey_data.to(self.device),
            'env_data': env_data.to(self.device),
            'horse_features': horse_features.to(self.device),
            'prev_race_data': prev_race_data.to(self.device),
        }

        predictions, updated_horse_data, updated_jockey_data,final_residual_output = self.model(
            inputs['horse_data'], inputs['jockey_data'], inputs['env_data'], 
            inputs['horse_features'], inputs['prev_race_data']
        )
        if self.task_type == 'regression':
            labels = labels.view(-1).to(self.device)
            predictions = predictions.view(-1).to(self.device)
        elif self.task_type == 'binary':
            labels = labels.view(-1).to(self.device)
            predictions = predictions.view(-1,2).to(self.device)
        elif self.task_type == 'list_net':
            predictions = predictions.view(-1,self.data_processor.num_selection).to(self.device)     
        else:
            raise ValueError(f"Unsupported task type: {self.task_type}") 
        
        self.data_processor.update_features(updated_horse_data.cpu(), horse_ids, self.data_processor.horse_features)
        self.data_processor.update_features(updated_jockey_data.cpu(), jockey_ids, self.data_processor.jockey_features)

        loss = self.criterion(predictions, labels)
        return horse_ids, predictions, labels, loss
    
    def merge_pred_results(self, horse_ids, pred, date):
        horse_ids = horse_ids.view(-1, 1)
        horse_ids = horse_ids.squeeze(1)
        horse_ids_np = horse_ids.cpu().numpy()
        pred_np = pred.cpu().detach().numpy()
        if self.task_type == 'binary':
            df = pd.DataFrame({
                'horse_id': horse_ids_np,
                'pred': pred_np[:,0],
            })
        else:
            df = pd.DataFrame({
                'horse_id': horse_ids_np,
                'pred': pred_np,
            })
        # horse_idごとの平均値を計算
        df['date'] = date#pd.to_datetime(date, format='%Y-%m-%d')
        df_results = df.groupby(['horse_id','date'], as_index=False)['pred'].mean()
        return df_results

    def process_pred_table(self,results):
        df = pd.merge(self.dataset.reset_index(), results, on=['horse_id', 'date'])
        return df

    def train(self, log_interval=100):
        self.data_processor.reset_features()
        self.model.train()
        mean_loss = 0

        for i, date in enumerate(tqdm.tqdm(self.train_dates)):
            horse_id, pred, label, loss = self.process_batch(date)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            

            mean_loss += (loss.item() - mean_loss) / (i + 1)
            df = self.merge_pred_results(horse_id, pred, date)
            df = self.process_pred_table(df)
            if i % log_interval == 0:
                
                print(f'Loss: {mean_loss:.4f}')
                self.log_func(df,self.task_type)
                print(pred[:3])
                print('--------------')

        self.train_losses.append(mean_loss)
        return mean_loss

    def evaluate(self, log_interval=50):
        self.model.eval()
        mean_loss = 0
        df_list = pd.DataFrame([])

        with torch.no_grad():
            for i, date in enumerate(tqdm.tqdm(self.eval_dates)):
                horse_id, pred, label, loss = self.process_batch(date)
                mean_loss += (loss.item() - mean_loss) / (i + 1)
                df = self.merge_pred_results(horse_id, pred, date)
                df = self.process_pred_table(df)
                df_list = pd.concat([df_list,df], axis=0)
                if i % log_interval == 0:
                    print(f'Loss: {mean_loss:.4f}')
                    self.log_func(df,self.task_type)
                    print(pred[:3])
                    print('--------------')
                

        self.log_func(df_list,self.task_type)

        self.eval_losses.append(mean_loss)
        return mean_loss,df_list


In [309]:
def set_criterion(task_type):
    if task_type == 'regression':
        criterion = nn.MSELoss()
    elif task_type == 'binary':
        criterion = nn.CrossEntropyLoss()
    elif task_type == 'list_net':
        criterion = listnet_loss
    else:
        raise ValueError(f"Unsupported task type: {task_type}")
    
    return criterion

def log_func(df,task_type):
    if task_type == 'regression':
        df['rank'] = df.groupby('index')['pred'].rank(ascending=True)
    else:
        df['rank'] = df.groupby('index')['pred'].rank(ascending=False)

    df_rank1 = df[df['rank'] == 1]
    df_rank1_high_return = df_rank1[df_rank1['単勝']>5]

    race_num = len(df['index'].unique())
    bet_num = len(df_rank1_high_return)

    correct_data = df_rank1[df_rank1['着順'] == 1]
    hukusho_correct_data = df_rank1[df_rank1['着順'] <=3]
    correct_high_return = df_rank1_high_return[df_rank1_high_return['着順'] == 1]
    correct_high_return_hukusho = df_rank1_high_return[df_rank1_high_return['着順'] <= 3]

    accuracy = len(correct_data) / len(df_rank1) * 100 if len(df_rank1) > 0 else 0
    accuracy_hukusho = len(hukusho_correct_data) / len(df_rank1) * 100 if len(df_rank1) > 0 else 0
    
    return_money_tansho = correct_high_return['tansho_return'].sum()/len(df_rank1_high_return) * 100 if bet_num > 0 else 100
    return_money_hukusho = correct_high_return_hukusho['hukusho_return'].sum()/bet_num * 100 if bet_num > 0 else 100


    print(f'race_num {race_num} | bet_num: {bet_num} | per: {bet_num/race_num*100:.1f}%')
    print(f'Tansho:   Accuracy: {accuracy:.1f}% | Return_money: {return_money_tansho:.1f}%')
    print(f'Hukusho:  Accuracy: {accuracy_hukusho:.1f}% | Return_money: {return_money_hukusho:.1f}%')
    
    return accuracy,return_money_tansho



In [312]:
from ListWise_func import listnet_loss
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# デバイスの確認
print("Device: {}".format(device))

task_type = 'binary'
h_size = 16
num_selections=14
pre_race_rate = 0.5
rdp = RacingDataProcessor(data,h_size,num_selections,pre_race_rate,task_type)

E_in = len(rdp.environment_columns)
E_out = 32
dh_in = len(rdp.horse_columns)
dh_out = 32
print("E_in: {} dh_in: {}".format(E_in,dh_in))

net = HorseRaceModel(task_type, device,rdp, h_size,E_in,E_out, dh_in,dh_out).to(device)
# 損失関数の定義
criterion = set_criterion(task_type)
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_split = 0.7
lem = LearningModule(rdp,net,data,task_type,train_split,device,optimizer, criterion,log_func)


Device: cuda
E_in: 35 dh_in: 8


In [313]:
epoch = 13

for i in range(epoch):
    # エポックの進行状況を表示
    print('---------------------------------------------')
    print("Epoch: {}/{}".format(i+1, epoch))

    train_loss = lem.train(log_interval = 50)
    test_loss,_ = lem.evaluate(log_interval = 50)

    print("Train_Loss: {:.4f}".format(train_loss))
    print("Test_Loss: {:.4f} ".format(test_loss))
    
    if i%1 == 0:
        torch.save(lem.model, 'model2/model_{}.pth'.format(i))



---------------------------------------------
Epoch: 1/13


  0%|          | 1/562 [00:00<05:39,  1.65it/s]

Loss: 0.9394
race_num 24 | bet_num: 23 | per: 95.8%
Tansho:   Accuracy: 0.0% | Return_money: 0.0%
Hukusho:  Accuracy: 29.2% | Return_money: 86.1%
tensor([[ 0.2711,  0.1512],
        [-0.7177,  0.4257],
        [-0.6132,  0.3339]], device='cuda:0', grad_fn=<SliceBackward0>)
--------------


  6%|▌         | 35/562 [00:21<05:18,  1.65it/s]


KeyboardInterrupt: 

In [235]:
net.eval()
df_list = pd.DataFrame([])

with torch.no_grad():
    for i, date in enumerate(tqdm.tqdm(lem.date_list)):
        race_ids = lem.data_processor.data[lem.data_processor.data['date'] == date].index.unique()
        horse_ids, horse_data, horse_features, env_data, jockey_ids, jockey_data, labels, prev_race_data,tansho_batch,return_tansho_batch,return_hukusho_batch = lem.data_processor.process_minibatch(race_ids)

        inputs = {
            'horse_data': horse_data.to(device),
            'jockey_data': jockey_data.to(device),
            'env_data': env_data.to(device),
            'horse_features': horse_features.to(device),
            'prev_race_data': prev_race_data.to(device),
        }

        predictions, updated_horse_data, updated_jockey_data,final_residual_output = lem.model(
            inputs['horse_data'], inputs['jockey_data'], inputs['env_data'], 
            inputs['horse_features'], inputs['prev_race_data']
        )

        lem.data_processor.update_features(updated_horse_data.cpu(), horse_ids, lem.data_processor.horse_features)
        lem.data_processor.update_features(updated_jockey_data.cpu(), jockey_ids, lem.data_processor.jockey_features)

        # loss = criterion(predictions, labels)
        final_residual_output = final_residual_output.view(-1,rdp.num_selections,3).cpu().detach()
        # tansho_batch = torch.unsqueeze(tansho_batch,2)
        # state = torch.cat([final_residual_output,tansho_batch], dim=2)
        # state = state.view(-1,rdp.num_selections*4)
        # reward = torch.cat([return_tansho_batch,return_hukusho_batch],dim=1)

        horse_ids = horse_ids.view(-1, 1)
        horse_ids = horse_ids.squeeze(1)
        horse_ids_np = horse_ids.cpu().numpy()
        final_residual_output_np = final_residual_output.view(-1,3).numpy()
        df = pd.DataFrame({
            'horse_id': horse_ids_np,
            'output_0': final_residual_output_np[:,0],
            'output_1': final_residual_output_np[:,1],
            'output_2': final_residual_output_np[:,2],
            })
        # horse_idごとの平均値を計算
        df['date'] = date#pd.to_datetime(date, format='%Y-%m-%d')
        df_results = df.groupby(['horse_id','date'], as_index=False)[['output_0', 'output_1', 'output_2']].mean()
        
        
        df_results = lem.process_pred_table(df_results)
        df_list = pd.concat([df_list,df_results], axis=0)

  1%|          | 6/803 [00:02<06:31,  2.04it/s]


KeyboardInterrupt: 

In [236]:
df_results

Unnamed: 0,index,着順,枠番,馬番,馬名,斤量,騎手,タイム,単勝,人気,horse_id,jockey_id,course_len,date,年齢,体重,体重変化,corse_used_num,entropy,y__binary_in3,y__binary_in1,horse_len,rank_per,y__multi_3,y__multi_5,y__reg_着順,y__reg_entropy,前走の着順,weather_曇,weather_晴,weather_雨,weather_小雨,weather_小雪,weather_雪,race_type_芝,race_type_ダート,race_type_障害,ground_state_良,ground_state_不良,ground_state_稍重,ground_state_重,性_牡,性_牝,性_セ,R_01,R_02,R_03,R_04,R_05,R_06,R_07,R_08,R_09,R_10,R_11,R_12,開催地_札幌,開催地_函館,開催地_福島,開催地_新潟,開催地_東京,開催地_中山,開催地_中京,開催地_京都,開催地_阪神,開催地_小倉,単勝_norm,tansho_return,hukusho_return,output_0,output_1,output_2
0,201707010201,1,0.546851,11,スズカフリオーソ,-0.028341,加藤祥太,0.488615,2.7,1.0,5907,120,0.356905,2017-01-16,-0.47624,-0.346794,-0.362821,-1.150429,1.95,1,1,0.262814,0.066667,0,0,0.07,1.95,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.649896,2.7,1.3,0.870479,0.340478,-1.007345
1,201707010201,2,0.987327,14,クレイトンシチー,-0.028341,鮫島克駿,0.509082,15.6,7.0,5074,121,0.356905,2017-01-16,-0.47624,0.243022,0.265959,-1.150429,4.48,1,0,0.262814,0.133333,0,0,0.13,4.48,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.517455,0.0,3.8,0.840126,0.326308,-1.018378
2,201707010201,3,-0.334099,8,アシャカリブラ,-1.103643,荻野極,0.512493,18.3,8.0,5531,124,0.356905,2017-01-16,-0.47624,1.488189,-0.677211,-1.150429,4.71,1,0,0.262814,0.2,0,1,0.2,4.71,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.489735,0.0,3.7,0.885524,0.355285,-0.994155
3,201707010201,4,0.106376,9,ミラクルシップ,0.50931,国分恭介,0.515904,14.0,6.0,5311,92,0.356905,2017-01-16,-0.47624,0.177487,-0.048431,-1.150429,4.32,0,0,0.262814,0.266667,0,1,0.27,4.32,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.533881,0.0,0.0,0.824753,0.315039,-1.023874
4,201707010201,5,-1.655525,2,シゲルボスザル,0.50931,菱田裕二,0.515904,13.2,5.0,6388,110,0.356905,2017-01-16,-0.47624,-0.608934,-0.677211,-1.150429,4.24,0,0,0.262814,0.333333,1,1,0.33,4.24,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.542095,0.0,0.0,0.836413,0.326976,-1.018275
5,201707010201,6,-0.774575,5,シンゼンキング,-0.028341,義英真,0.519315,61.2,10.0,4185,119,0.356905,2017-01-16,-0.47624,-0.019118,0.265959,-1.150429,6.45,0,0,0.262814,0.4,1,2,0.4,6.45,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.049292,0.0,0.0,0.832446,0.32666,-1.018476
6,201707010201,7,-0.334099,7,オメガガーディアン,0.50931,岩田康誠,0.53296,10.0,4.0,7523,190,0.356905,2017-01-16,-0.47624,2.864426,-0.677211,-1.150429,3.84,0,0,0.262814,0.466667,1,2,0.47,3.84,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.574948,0.0,0.0,0.830409,0.313437,-1.023573
7,201707010201,8,0.106376,10,キモンズラブ,-1.641294,伴啓太,0.536371,182.1,12.0,6497,113,0.356905,2017-01-16,-0.47624,-0.674469,-0.048431,-1.150429,8.02,0,0,0.262814,0.533333,1,2,0.53,8.02,11.0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1.191956,0.0,0.0,0.859689,0.335704,-1.011598
8,201707010201,9,-1.21505,3,セヴィルロアー,0.50931,丸山元気,0.546605,3.6,2.0,6374,95,0.356905,2017-01-16,-0.47624,0.308557,0.265959,-1.150429,2.36,0,0,0.262814,0.6,1,3,0.6,2.36,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-0.640655,0.0,0.0,0.841277,0.32493,-1.018817
9,201707010201,10,-0.774575,6,シロイバラ,0.50931,西村太一,0.550016,297.1,15.0,5175,99,0.356905,2017-01-16,-0.47624,0.243022,1.52352,-1.150429,8.73,0,0,0.262814,0.666667,2,3,0.67,8.73,10.0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2.37263,0.0,0.0,0.831686,0.325889,-1.018862


In [27]:
# net = torch.load('model/model_1.pth')

In [62]:
loss,df_list = lem.evaluate(log_interval = 100)

  0%|          | 1/241 [00:00<03:42,  1.08it/s]

Loss: 0.0646
race_num 36 | bet_num: 6 | per: 16.7%
Tansho:   Accuracy: 22.2% | Return_money: 0.0%
Hukusho:  Accuracy: 55.6% | Return_money: 216.7%
tensor([0.4671, 0.4991, 0.5759], device='cuda:0')
--------------


 42%|████▏     | 101/241 [00:56<01:33,  1.50it/s]

Loss: 0.0688
race_num 36 | bet_num: 2 | per: 5.6%
Tansho:   Accuracy: 22.2% | Return_money: 0.0%
Hukusho:  Accuracy: 61.1% | Return_money: 0.0%
tensor([0.5670, 0.5144, 0.6027], device='cuda:0')
--------------


 83%|████████▎ | 201/241 [01:55<00:24,  1.62it/s]

Loss: 0.0688
race_num 36 | bet_num: 3 | per: 8.3%
Tansho:   Accuracy: 25.0% | Return_money: 290.0%
Hukusho:  Accuracy: 52.8% | Return_money: 93.3%
tensor([0.4664, 0.5039, 0.5707], device='cuda:0')
--------------


100%|██████████| 241/241 [02:19<00:00,  1.73it/s]

race_num 7379 | bet_num: 1222 | per: 16.6%
Tansho:   Accuracy: 24.2% | Return_money: 84.9%
Hukusho:  Accuracy: 56.0% | Return_money: 102.9%





In [102]:
class createData:
    def __init__(self,data,rdp,net,batch_date_size=5):
        self.data = data
        self.date_list = self.data['date'].sort_values().unique()
        self.date_num = len(self.date_list)
        self.rdp = rdp
        self.net = net
        self.batch_size = batch_date_size
        self.count = 0

    def get_state_reward(self,date_list):
        state_memory = []
        reward_memory = []
        for date in date_list:
            race_ids = self.data[self.data['date'] == date].index.unique()
            horse_ids, horse_data, horse_features, env_data, jockey_ids, jockey_data, labels, prev_race_data,tansho_batch,return_tansho_batch,return_hukusho_batch = self.rdp.process_minibatch(race_ids)

            # Device transfer
            inputs = {
                'horse_data': horse_data.to(device),
                'jockey_data': jockey_data.to(device),
                'env_data': env_data.to(device),
                'horse_features': horse_features.to(device),
                'prev_race_data': prev_race_data.to(device),
            }

            predictions, updated_horse_data, updated_jockey_data,final_residual_output = self.net(
                inputs['horse_data'], inputs['jockey_data'], inputs['env_data'], 
                inputs['horse_features'], inputs['prev_race_data']
            )

            self.rdp.update_features(updated_horse_data.cpu(), horse_ids, self.rdp.horse_features)
            self.rdp.update_features(updated_jockey_data.cpu(), jockey_ids, self.rdp.jockey_features)

            # loss = criterion(predictions, labels)
            final_residual_output = final_residual_output.view(-1,rdp.num_selections,8).cpu().detach()
            tansho_batch = torch.unsqueeze(tansho_batch,2)
            state = torch.cat([final_residual_output,tansho_batch], dim=2)
            state = state.view(-1,rdp.num_selections*9)
            reward = torch.cat([return_tansho_batch,return_hukusho_batch],dim=1)
            state_memory.append(state)
            reward_memory.append(reward)
        

        state_memory = torch.cat(state_memory,dim=0)
        reward_memory = torch.cat(reward_memory,dim=0)
        return state_memory,reward_memory
    
    def get_batch_list(self):
        net.eval()
        end = (self.count+1)*self.batch_size
        with torch.no_grad():
            if end >self.date_num:
                self.count = 0
                end = (self.count+1)*self.batch_size
                rdp.reset_features()
            first = self.count*self.batch_size
            self.count +=1

            state,reward = self.get_state_reward(self.date_list[first:end])
            return state,reward


In [118]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import logging

logging.basicConfig(level=logging.INFO, format='%(message)s')

class HorseRacingEnv(gym.Env):
    def __init__(self, createData):
        super(HorseRacingEnv, self).__init__()
        

        self.createData = createData
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.createData.rdp.num_selections*9,), dtype=np.float32)
        
        self.bet_num = 8
        self.bet_rate = 1/self.bet_num
        
        # 行動空間: 28次元の離散値（0〜100）
        self.action_space = spaces.MultiDiscrete([createData.rdp.num_selections*2+1] * self.bet_num)
        
        self.reward_num = 0
        self.additional_state = 0
        
        self.max_steps = 10
        self.change_list_step = 0
        self.change_list_max_step = 100 * self.createData.batch_size

        self.log_count = 0
        self.reward_log = []
        
        self.set_state_reward_list()

    def get_state_reward(self):
        index = random.randrange(len(self.state_list))
        return self.state_list[index], self.reward_list[index]
    
    def set_state_reward_list(self):
        self.state_list, self.reward_list = self.createData.get_batch_list()

    def change_state_reward_list(self):
        self.change_list_step +=1
        if self.change_list_step > self.change_list_max_step:
            self.change_list_step = 0
            self.set_state_reward_list()

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.state, self.reward_weights = self.get_state_reward()
        self.current_step = 0
        
        # self.additional_stateを2次元に変換
        # obs = np.concatenate([self.state, np.expand_dims(self.additional_state, axis=0)])
        obs = self.state
        
        # 空のinfo辞書を追加して返す
        return obs, {}
    
    def step(self, action):
        # # 行動を0-1にスケーリングし、正規化
        # action = np.array(action) / 5
        # action /= np.sum(action) if np.sum(action) > 1 else 1
        # bet_money = np.sum(action) if np.sum(action) < 1 else 1
        # 行動と報酬の重みの内積が報酬となる
        # net_reward = (np.dot(action, self.reward_weights)) #* self.additional_state
        
        net_reward = 0
        bet_money = 0
        for i in action:
            if i < self.createData.rdp.num_selections*2:
                net_reward += self.reward_weights[i]*self.bet_rate
                bet_money +=self.bet_rate

        net_reward -= bet_money

        reward = 0 if net_reward < 0.01 else 1
        
        # 追加状態を報酬の値で更新
        self.additional_state += net_reward
        self.reward_num += reward
        self.state, self.reward_weights = self.get_state_reward()

        self.log_count += 1
        if self.log_count % 500 == 0:
            self.reward_log.append(self.additional_state)
            logging.info(f'Step: {self.log_count},  Reward: {self.additional_state:.1f},win_num:{self.reward_num:.1f}')
            self.additional_state = 0
            self.reward_num = 0
        if self.log_count % 2000 == 0:
            logging.info(f'action {action}')
        
        
        self.current_step += 1
        done = self.current_step >= self.max_steps
        truncated = False  # 条件に応じてTrueに変更することも可能

        self.change_state_reward_list()

        return self.state, reward, done,truncated, {}
        # return np.concatenate([self.state, [self.additional_state]]), reward, done,truncated, {}
    
    def render(self, mode='human'):
        pass



In [119]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# カスタムネットワークの定義
class CustomNetwork(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 256):
        # observation_space.shape[0]が入力の次元数
        super(CustomNetwork, self).__init__(observation_space, features_dim)

        self.net = nn.Sequential(
            nn.Linear(observation_space.shape[0], 128),
            nn.ReLU(),
            nn.Linear(128, 128),  # 特徴次元に出力
            nn.ReLU(),
            nn.Linear(128, 128),  # 特徴次元に出力
            nn.ReLU(),
            nn.Linear(128, features_dim),  # 特徴次元に出力
            nn.ReLU()
        )

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.net(observations)

In [120]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# 学習で使う状態を渡すためのクラス
cd = createData(data, rdp, net, batch_date_size=6)
env = HorseRacingEnv(cd)
env.rdp = rdp
env = DummyVecEnv([lambda: env])

# PPOエージェントに自作ネットワークを統合する
policy_kwargs = dict(
    features_extractor_class=CustomNetwork,  # カスタムネットワークを指定
    features_extractor_kwargs=dict(features_dim=128)  # 特徴量次元を設定
)

model = PPO(
    "MlpPolicy",  # ここは変わらず、カスタムネットワークは内部で使用される
    env,
    verbose=0,
    n_steps=128,
    batch_size=128,
    n_epochs=10,
    learning_rate=8e-5,
    gamma=0.6,
    clip_range=0.2,
    policy_kwargs=policy_kwargs  # ここでカスタムネットワークを渡す
)


# モデルの学習
model.learn(total_timesteps=400000)


Step: 500,  Reward: -54.2,win_num:110.0
Step: 1000,  Reward: -108.9,win_num:83.0
Step: 1500,  Reward: -101.7,win_num:88.0
Step: 2000,  Reward: -152.0,win_num:96.0
action [ 7 31 31 10  3 18  9  5]
Step: 2500,  Reward: -84.7,win_num:96.0
Step: 3000,  Reward: 25.8,win_num:88.0
Step: 3500,  Reward: -207.1,win_num:92.0
Step: 4000,  Reward: -186.5,win_num:86.0
action [19 18 31 32 11 36 31 31]
Step: 4500,  Reward: -144.7,win_num:98.0
Step: 5000,  Reward: -132.3,win_num:94.0
Step: 5500,  Reward: -140.1,win_num:90.0
Step: 6000,  Reward: -167.8,win_num:96.0
action [24 32  5 28 10  4 33 29]
Step: 6500,  Reward: -143.5,win_num:77.0
Step: 7000,  Reward: -148.3,win_num:90.0
Step: 7500,  Reward: -180.3,win_num:89.0
Step: 8000,  Reward: -189.5,win_num:77.0
action [18 10  9 15 20 36 34 24]
Step: 8500,  Reward: -142.7,win_num:85.0
Step: 9000,  Reward: -144.2,win_num:97.0
Step: 9500,  Reward: -95.1,win_num:108.0
Step: 10000,  Reward: -121.5,win_num:80.0
action [11 32  4 31  8  3  1 14]
Step: 10500,  Rewa

KeyboardInterrupt: 

In [None]:
# 学習済みモデルの評価
obs = env.reset()
for _ in range(100):
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()

In [210]:
plt.plot(env.reward_log)

AttributeError: 'DummyVecEnv' object has no attribute 'reward_log'

In [None]:
import gymnasium as gym
from gymnasium.envs.registration import register

# 環境を登録
register(
    id='KeibaEnv-v0',  # 環境のID
    entry_point='__main__:KeibaEnv',  # 環境クラスへのパス
)

# 環境の作成
env = gym.make('KeibaEnv-v0')

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
import matplotlib.pyplot as plt

# 累積報酬を記録するためのコールバック
class RewardCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(RewardCallback, self).__init__(verbose)
        self.episode_rewards = []

    def _on_step(self) -> bool:
        # エピソードが終了したときに報酬を記録
        if self.locals["dones"][0]:
            episode_reward = self.locals["infos"][0]["episode"]["r"]  # 報酬はfloatなので直接取得
            self.episode_rewards.append(episode_reward)
        return True



# PPOモデルを作成
model = PPO("MlpPolicy", env, verbose=1)

# コールバックを定義
reward_callback = RewardCallback()

# 学習中に報酬を記録しながら10,000ステップ学習
model.learn(total_timesteps=10000, callback=reward_callback)

# 学習後の報酬の推移をプロット
plt.plot(reward_callback.episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Training Progress')
plt.show()
