In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
import random
import tqdm
from torch import nn
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
np.set_printoptions(threshold=200)
torch.set_printoptions(edgeitems=200)

In [2]:
import pickle
with open("pickle/pickle_r_all", "rb") as f:
    r = pickle.load(f)
with open("pickle/pickle_p_all", "rb") as f:
    p = pickle.load(f)
# with open("pickle/pickle_st_all", "rb") as f:
#     st = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# data = r.data_c[r.data_c['date']>'2024-07-01']
data = r.data_c

In [4]:
from Class.LearningDataProcessor import RacingDataProcessor
from Class.Model import HorseRaceModel
from Class.LearningModule import LearningModule
from ListWise_func import listnet_loss

In [5]:
def set_criterion(task_type):
    if task_type == 'regression':
        criterion = nn.MSELoss()
    elif task_type == 'binary':
        criterion = nn.CrossEntropyLoss()
    elif task_type == 'list_net':
        criterion = listnet_loss
    else:
        raise ValueError(f"Unsupported task type: {task_type}")
    
    return criterion

def log_func(df,task_type):
    if task_type == 'regression':
        df['rank'] = df.groupby('index')['pred'].rank(ascending=False)
    else:
        df['rank'] = df.groupby('index')['pred'].rank(ascending=True)
    df_rank1 = df[df['rank'] == 1]
    correct_data = df_rank1[df_rank1['着順'] == 1]
    accuracy = len(correct_data) / len(df_rank1) * 100 if len(df_rank1) > 0 else 0
    return_money = correct_data['単勝'].sum()/len(df_rank1)* 100 if len(df_rank1) > 0 else 0
    print(f'Accuracy: {accuracy:.1f}% | Return_money: {return_money:.1f}%')
    return accuracy,return_money



In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# デバイスの確認
print("Device: {}".format(device))

task_type = 'list_net'
h_size = 64
rdp = RacingDataProcessor(data,h_size,num_selections=14,task_type=task_type)

E_in = len(rdp.environment_columns)
E_out = 32
dh_in = len(rdp.horse_columns)
dh_out = 32
print("E_in: {} dh_in: {}".format(E_in,dh_in))

net = HorseRaceModel(task_type, device,rdp, h_size,E_in,E_out, dh_in,dh_out).to(device)
# 損失関数の定義
criterion = set_criterion(task_type)
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_split = 0.7
lem = LearningModule(rdp,net,data,task_type,train_split,device,optimizer, criterion,log_func)


Device: cpu
E_in: 34 dh_in: 8


In [7]:
epoch = 120

for i in range(epoch):
    # エポックの進行状況を表示
    print('---------------------------------------------')
    print("Epoch: {}/{}".format(i+1, epoch))

    train_loss = lem.train(log_interval = 100)
    test_loss,_ = lem.evaluate(log_interval = 50)

    print("Train_Loss: {:.4f}".format(train_loss))
    print("Test_Loss: {:.4f} ".format(test_loss))
    
    if i%4 == 0:
        torch.save(lem.model, 'model2/model_{}.pth'.format(i))
        torch.save(lem.data_processor.horse_features, 'model2/horse_features _list_{}.pt'.format(i))
        torch.save(lem.data_processor.jockey_features, 'model2/jockey_features_list_{}.pt'.format(i))



---------------------------------------------
Epoch: 1/120


  0%|          | 1/562 [00:00<07:15,  1.29it/s]

Loss: 0.1247
Accuracy: 8.3% | Return_money: 82.1%


 18%|█▊        | 101/562 [01:17<06:33,  1.17it/s]

Loss: 0.1044
Accuracy: 0.0% | Return_money: 0.0%


 36%|███▌      | 201/562 [02:39<05:22,  1.12it/s]

Loss: 0.1009
Accuracy: 5.6% | Return_money: 27.8%


 54%|█████▎    | 301/562 [04:08<05:33,  1.28s/it]

Loss: 0.1026
Accuracy: 0.0% | Return_money: 0.0%


 71%|███████▏  | 401/562 [06:44<05:23,  2.01s/it]

Loss: 0.1002
Accuracy: 5.6% | Return_money: 23.9%


 89%|████████▉ | 501/562 [08:54<00:51,  1.18it/s]

Loss: 0.1002
Accuracy: 4.2% | Return_money: 46.7%


100%|██████████| 562/562 [09:47<00:00,  1.05s/it]
  0%|          | 1/241 [00:00<03:08,  1.27it/s]

Loss: 0.0813
Accuracy: 13.9% | Return_money: 67.5%


 21%|██        | 51/241 [00:35<02:21,  1.35it/s]

Loss: 0.0971
Accuracy: 13.9% | Return_money: 83.3%


 42%|████▏     | 101/241 [01:08<01:38,  1.42it/s]

Loss: 0.1033
Accuracy: 11.1% | Return_money: 38.3%


 63%|██████▎   | 151/241 [01:45<00:50,  1.77it/s]

Loss: 0.1008
Accuracy: 5.6% | Return_money: 13.9%


 83%|████████▎ | 201/241 [02:18<00:26,  1.51it/s]

Loss: 0.0966
Accuracy: 30.6% | Return_money: 156.4%


100%|██████████| 241/241 [02:56<00:00,  1.36it/s]


TypeError: log_func() missing 1 required positional argument: 'task_type'

In [None]:
plt.plot(lem.train_losses)
plt.plot(lem.eval_losses)

In [None]:
model_load = torch.load('model2/model_8.pth')