In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, random_split
import time
import pandas as pd
from pandas import Series, DataFrame

from horse_dataset import HorseDataset
from winning_set import WinningSetModule, send_next_layer

USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    

In [2]:
def w_train(dataset, model, optimizer, n_iters, test): 
    if test :
        
        res_x = []
        res_y = []
        res_z = []

        model.to(device=device)
        model.eval()
        with torch.no_grad():
            for i, (x, y, z) in enumerate(dataset):
                x = x.to(device=device)
                
                output = model(x)
                if output.shape[0] != 32 :
                    break

                for j in range(len(output)): #batch num
                    out_x = []
                    out_y = []
                    out_z = []
                    
                    index = output[j].argsort()
                    dex = np.zeros(100)
#                     for k in range(len(index)): #lane
#                         if index[k] >= 7 : 
#                             dex[index[k]] = 0
#                         else :
#                             dex[index[k]] = 1
                    for k in range(len(index)): #lane
                         if index[k] < 7:
                            out_x.append(x[j][index[k]])
                            out_y.append(y[j][index[k]])
                            out_z.append(z[j][index[k]])
#                         if dex[k] == 1 : 
#                             out_x.append(x[j][k])
#                             out_y.append(y[j][k])
                            #out_z.append(z[j][k])
                        
                    res_x.append(out_x)
                    res_y.append(out_y)
                    res_z.append(out_z)
            
            return res_x, res_y, res_z
                

                    


        
    else :
        model.to(device=device)
        model.train()
        start = time.time()
        print_every = 10
        for e in range(n_iters):
            for i, (x, y, z) in enumerate(dataset):
                x = x.to(device=device)
                y = (len(y[0]) - y.type(torch.float32)) / (len(y[0]) - 1)
                y = (torch.exp(y) - 1) / 1.72
                y = y.to(device=device)
                model.zero_grad()
                output = model(x)
                loss = loss_fcn(output, y)
                loss.backward()
                optimizer.step()
            if e % print_every == 0:
                print('(%d %d%%) %.4f' % (e, e / n_iters * 100, loss))

In [3]:
loss_fcn = nn.MSELoss()
criterion = nn.MSELoss()

w_model = WinningSetModule(9, 8, 14)

n_iters = 30

test_data_saved = []
train_data_saved = []

train_x = []
train_y = []
test_x = []
test_y = []
test_z = []

for _size in range(7, 8):
    w_optimizer = torch.optim.Adam(w_model.parameters(), lr=0.001)
    dataset = HorseDataset(f'./preprocess/data/data_{_size:02}.pkl')
    _dataset_size = len(dataset)
    _test_data_size = _dataset_size // 10
    _train_data_size = _dataset_size - _test_data_size

    train_dataset, test_dataset = random_split(dataset, [_train_data_size, _test_data_size])
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    

    
    for i, (x,y,z) in enumerate(train_loader):

        if len(x) != 32 :
            break
        for j in range(32):
            train_x.append(x[j])
            train_y.append(y[j])
        
    for i, (x,y,z) in enumerate(test_loader):
        if len(x) != 32 :
            break
        for j in range(32):
            test_x.append(x[j])
            test_y.append(y[j])
            test_z.append(z[j])
            

    


for _size in range(8, 15):
    w_optimizer = torch.optim.Adam(w_model.parameters(), lr=0.001)
    dataset = HorseDataset(f'./preprocess/data/data_{_size:02}.pkl')
    _dataset_size = len(dataset)
    _test_data_size = _dataset_size // 10
    _train_data_size = _dataset_size - _test_data_size
    
    
    train_dataset, test_dataset = random_split(dataset, [_train_data_size, _test_data_size])
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    w_train(train_loader, w_model, w_optimizer, n_iters, 0) # training 합니다. IMPORTANT!!!
     
    train_data_saved.append(train_loader) # train를 추출합니다
    test_data_saved.append(test_loader) # test를 추출합니다


(0 0%) 0.1048
(10 33%) 0.0615
(20 66%) 0.0618
(0 0%) 0.1109
(10 33%) 0.0678
(20 66%) 0.0450
(0 0%) 0.1017
(10 33%) 0.0700
(20 66%) 0.0631
(0 0%) 0.1016
(10 33%) 0.0629
(20 66%) 0.0535
(0 0%) 0.0893
(10 33%) 0.0642
(20 66%) 0.0682
(0 0%) 0.1016
(10 33%) 0.0843
(20 66%) 0.0735
(0 0%) 0.0942
(10 33%) 0.0657
(20 66%) 0.0575


In [4]:
class Horse_1st(nn.Module):
    def __init__(self, lane):
        super(Horse_1st, self).__init__()
        self.fc1 = nn.Linear(9*lane,100)
        self.bn1 = nn.BatchNorm1d(100)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.bn2 = nn.BatchNorm1d(100)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc3 = nn.Linear(100,lane)        
        self.softmax = nn.Softmax(dim=1)
        self.lane = lane
        
    def forward(self, x):
      x = x.view(-1, 9*self.lane)
      x = self.fc1(x)
      x = self.bn1(x)
      x = self.relu1(x)
      x = self.fc2(x)
      x = self.bn2(x)
      x = self.relu2(x)
      x = self.dropout(x)
      x = self.fc3(x)
      out = self.softmax(x)

      return out

class Horse_2nd(nn.Module):
    def __init__(self, lane):
        super(Horse_2nd, self).__init__()
        self.fc1 = nn.Linear(9*lane,100)
        self.bn1 = nn.BatchNorm1d(100)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.bn2 = nn.BatchNorm1d(100)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc3 = nn.Linear(100,lane) 
        self.softmax = nn.Softmax(dim=1)
        self.lane = lane

    def forward(self, x):
      x = x.view(-1, 9*self.lane)
      x = self.fc1(x)
      x = self.bn1(x)
      x = self.relu1(x)
      x = self.fc2(x)
      x = self.bn2(x)
      x = self.relu2(x)
      x = self.dropout(x)
      x = self.fc3(x)
      out = self.softmax(x)

      return out
    
class Horse_3rd(nn.Module):
    def __init__(self, lane):
        super(Horse_3rd, self).__init__()
        self.fc1 = nn.Linear(9*lane,100)
        self.bn1 = nn.BatchNorm1d(100)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.bn2 = nn.BatchNorm1d(100)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc3 = nn.Linear(100,lane)
        self.softmax = nn.Softmax(dim=1)
        self.lane = lane

    def forward(self, x):
      x = x.view(-1, 9*self.lane)
      x = self.fc1(x)
      x = self.bn1(x)
      x = self.relu1(x)
      x = self.fc2(x)
      x = self.bn2(x)
      x = self.relu2(x)
      x = self.dropout(x)
      x = self.fc3(x)
      out = self.softmax(x)

      return out

In [5]:

def r_train(data_x, data_y, model, rank, lane, criterion, optimizer, num_epochs, test): 
    if test : 
        model.eval()
        acc = 0
        result = []
        with torch.no_grad():
            for i in range(int(len(data_x)/32)):
                x = data_x[i*32:(i+1)*32]
                y = data_y[i*32:(i+1)*32]
                
                
                for k in range(len(x)):
                    for l in range(len(x[k])):
                        if l == 0 :
                            X = x[k][l].view(1,-1)
                        else :
                            X = torch.cat((X,x[k][l].view(1,-1)), dim=0)
                    if k == 0:
                        Y = X.view(1,7,-1)
                    else :
                        Y = torch.cat((Y,X.view(1,7,-1)), dim=0)
                
                x = torch.FloatTensor(Y) #np.asarray(x, dtype=np.float64)
                x = x.to(device)

                optimizer.zero_grad()
                outputs = model(x)
                
                for k in range(len(outputs)):
                    result.append(outputs[k])

                rank_horse_list = []
                for batch_no in range(32):
                    rank_horse = []
                    for horse_no in range(lane):
                        if y[batch_no][horse_no] == rank:
                            rank_horse.append(1)
                        else:
                            rank_horse.append(0)

                    rank_horse_list.append(rank_horse)

                rank_horse_list = torch.FloatTensor(rank_horse_list)
                rank_horse_list = rank_horse_list.to(device)
                

                for j in range(len(y)):
                    if rank_horse_list.argmax(1)[j] == outputs.argmax(1)[j]:
                        acc += 1

            print("ACCURACY [%d/%d], %.4f"
                  %(acc, (int(len(data_x)/32)*32), acc/(int(len(data_x)/32)*32)))

            return result

    else : 
        model.train()
        for epoch in range(num_epochs):
            for i in range(int(len(data_x)/32)):
                x = data_x[i*32:(i+1)*32]
                y = data_y[i*32:(i+1)*32]
                
                
                for k in range(len(x)):
                    for l in range(len(x[k])):
                        if l == 0 :
                            X = x[k][l].view(1,-1)
                        else :
                            X = torch.cat((X,x[k][l].view(1,-1)), dim=0)
                    if k == 0:
                        Y = X.view(1,7,-1)
                    else :
                        Y = torch.cat((Y,X.view(1,7,-1)), dim=0)
                
                x = torch.FloatTensor(Y) #np.asarray(x, dtype=np.float64)
                x = x.to(device)
                
                optimizer.zero_grad()
                
                outputs = model(x)
                
                rank_horse_list = []
                for batch_no in range(32):
                    rank_horse = []
                    for horse_no in range(lane):
                        if y[batch_no][horse_no] == rank:
                            rank_horse.append(1)
                        else:
                            rank_horse.append(0)

                    rank_horse_list.append(rank_horse)

                rank_horse_list = torch.FloatTensor(rank_horse_list)
                rank_horse_list=rank_horse_list.to(device)

                loss = criterion(outputs, rank_horse_list)
                
                loss.backward()
                optimizer.step()
            print(loss)
                
#         if (epoch+1) % 10 == 0 :
#             print(epoch)
#                 print("loss : %.4f"
#                       %(loss))


        


In [6]:



for i in range(len(train_data_saved)):

    res_x, res_y, _ = w_train(train_data_saved[i], w_model, w_optimizer, 1, 1)
    
    for j in range(len(res_x)):
        train_x.append(res_x[j])
        train_y.append(res_y[j])
            

print(len(train_x))
            

            

26144


In [7]:

TX = []
TY = []
TZ = []
for i in range(len(test_data_saved)):
    res_x, res_y, res_z = w_train(test_data_saved[i], w_model, w_optimizer, 1, 1)
    tx = []
    ty = []
    tz = []
    for j in range(len(res_x)):
        tx.append(res_x[j])
        ty.append(res_y[j])
        tz.append(res_z[j])
    TX.append(tx)
    TY.append(ty)
    TZ.append(tz)
    
print(len(test_x))
            
                   


96


In [8]:
criterion = nn.MSELoss()
num_epochs = 10 # 100

model_1st = Horse_1st(7)
model_1st.to(device)

model_2nd = Horse_2nd(7)
model_2nd.to(device)

model_3rd = Horse_3rd(7)
model_3rd.to(device)

optimizer_1 = torch.optim.Adam(model_1st.parameters(), lr=0.001)
optimizer_2 = torch.optim.Adam(model_2nd.parameters(), lr=0.001)
optimizer_3 = torch.optim.Adam(model_3rd.parameters(), lr=0.001)

r_train(train_x, train_y, model_1st, 1, 7, criterion, optimizer_1, num_epochs, 0) 
r_train(train_x, train_y, model_2nd, 2, 7, criterion, optimizer_2, num_epochs, 0) 
r_train(train_x, train_y, model_3rd, 3, 7, criterion, optimizer_3, num_epochs, 0) 

tensor(0.0697, grad_fn=<MseLossBackward>)
tensor(0.0701, grad_fn=<MseLossBackward>)
tensor(0.0683, grad_fn=<MseLossBackward>)
tensor(0.0671, grad_fn=<MseLossBackward>)
tensor(0.0651, grad_fn=<MseLossBackward>)
tensor(0.0660, grad_fn=<MseLossBackward>)
tensor(0.0653, grad_fn=<MseLossBackward>)
tensor(0.0652, grad_fn=<MseLossBackward>)
tensor(0.0653, grad_fn=<MseLossBackward>)
tensor(0.0611, grad_fn=<MseLossBackward>)
tensor(0.0560, grad_fn=<MseLossBackward>)
tensor(0.0569, grad_fn=<MseLossBackward>)
tensor(0.0558, grad_fn=<MseLossBackward>)
tensor(0.0545, grad_fn=<MseLossBackward>)
tensor(0.0515, grad_fn=<MseLossBackward>)
tensor(0.0517, grad_fn=<MseLossBackward>)
tensor(0.0532, grad_fn=<MseLossBackward>)
tensor(0.0511, grad_fn=<MseLossBackward>)
tensor(0.0508, grad_fn=<MseLossBackward>)
tensor(0.0498, grad_fn=<MseLossBackward>)
tensor(0.0644, grad_fn=<MseLossBackward>)
tensor(0.0635, grad_fn=<MseLossBackward>)
tensor(0.0618, grad_fn=<MseLossBackward>)
tensor(0.0628, grad_fn=<MseLossBac

In [9]:
conf_1 = r_train(test_x, test_y, model_1st, 1, 7, criterion, optimizer_1, num_epochs, 1) 
conf_2 = r_train(test_x, test_y, model_2nd, 2, 7, criterion, optimizer_2, num_epochs, 1) 
conf_3 = r_train(test_x, test_y, model_3rd, 3, 7, criterion, optimizer_3, num_epochs, 1) 


net = 0
exp = 0

#단승 기대 수익률
for i in range(len(conf_1)):
    temp = []
    for j in range(len(conf_1[i])):
        temp.append(conf_1[i][j])

    temp = torch.Tensor(temp)
#     for j in range(len(temp)):
#         temp[j] = temp[j] #* test_z[i][j][0].item() 
    bet = temp.argmax(0).item()

    net += test_z[i][j][0].item() * ( test_y[i][bet] < 2 )  #실제 수익
    
    #test_x[i][bet][5].item() 이게 연승 배당률인데 정규화 되어있고 어떤 col인지 모름

exp = net / len(conf_1)

print(exp.item())

net = 0
exp = 0

#연승 기대 수익률
for i in range(len(conf_1)):

    temp = []
    for j in range(len(conf_1[i])):
        temp.append(1-(1-conf_1[i][j])*(1-conf_2[i][j])*(1-conf_2[i][j]))

    temp = torch.Tensor(temp)
#     for j in range(len(temp)):
#         temp[j] = temp[j] #* test_z[i][j][1].item() 
    bet = temp.argmax(0).item()

    net += test_z[i][j][1].item()  * ( test_y[i][bet] <= 3 )  #실제 수익
    
    #test_x[i][bet][5].item() 이게 연승 배당률인데 정규화 되어있고 어떤 col인지 모름

exp = net / len(conf_1)

print(exp.item())




    

ACCURACY [31/96], 0.3229
ACCURACY [30/96], 0.3125
ACCURACY [12/96], 0.1250
4.8979172706604
3.4552080631256104


In [10]:

for k in range(len(test_data_saved)):
    conf_1 = r_train(TX[k], TY[k], model_1st, 1, 7, criterion, optimizer_1, num_epochs, 1) 
    conf_2 = r_train(TX[k], TY[k], model_2nd, 2, 7, criterion, optimizer_2, num_epochs, 1) 
    conf_3 = r_train(TX[k], TY[k], model_3rd, 3, 7, criterion, optimizer_3, num_epochs, 1) 
    print(len(conf_1))
    print(len(conf_2))
    print(len(conf_3))

    net = 0
    exp = 0

    #단승 기대 수익률
    for i in range(len(conf_1)):
        temp = []
        for j in range(len(conf_1[i])):
            temp.append(conf_1[i][j])

        temp = torch.Tensor(temp)
        for j in range(len(temp)):
            temp[j] = temp[j] #* TZ[k][i][j][0].item() 
        bet = temp.argmax(0).item()

        net += TZ[k][i][j][0].item() * ( TY[k][i][bet] < 2 )  #실제 수익




    exp = net / len(conf_1)

    print(exp.item())

    net = 0
    exp = 0

    #연승 기대 수익률
    for i in range(len(conf_1)):

        temp = []
        for j in range(len(conf_1[i])):
            temp.append(1-(1-conf_1[i][j])*(1-conf_2[i][j])*(1-conf_2[i][j]))

        temp = torch.Tensor(temp)
        for j in range(len(temp)):
            temp[j] = temp[j] #* TZ[k][i][j][1].item() 
        bet = temp.argmax(0).item()


        net += TZ[k][i][j][1].item()  * ( TY[k][i][bet] <= 3 )  #실제 수익



    exp = net / len(conf_1)

    print(exp.item())

    


ACCURACY [124/224], 0.5536
ACCURACY [97/224], 0.4330
ACCURACY [70/224], 0.3125
224
224
224
1.3803573846817017
0.9875003099441528
ACCURACY [176/288], 0.6111
ACCURACY [127/288], 0.4410
ACCURACY [100/288], 0.3472
288
288
288
1.440278172492981
1.0225698947906494
ACCURACY [190/320], 0.5938
ACCURACY [164/320], 0.5125
ACCURACY [107/320], 0.3344
320
320
320
1.1684377193450928
0.9625002145767212
ACCURACY [241/416], 0.5793
ACCURACY [213/416], 0.5120
ACCURACY [148/416], 0.3558
416
416
416
1.0663461685180664
1.0079333782196045
ACCURACY [490/832], 0.5889
ACCURACY [445/832], 0.5349
ACCURACY [323/832], 0.3882
832
832
832
1.2743990421295166
1.006249189376831
ACCURACY [96/160], 0.6000
ACCURACY [79/160], 0.4938
ACCURACY [63/160], 0.3937
160
160
160
1.5418751239776611
1.100000023841858
ACCURACY [239/416], 0.5745
ACCURACY [213/416], 0.5120
ACCURACY [170/416], 0.4087
416
416
416
1.0002402067184448
1.0055292844772339
