In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import hashlib
import math, os
from tqdm import tqdm

# 関数、クラス　定義

In [2]:
pd.set_option('display.max_columns', 40)

In [3]:
# トレーニング用の入力データの選択
#X_columns = ['locality', 'age', 'rank', 'leg', 'racing piont', \
#             'S', 'B', 'Nige', 'Maki', 'Sashi', 'Ma', \
#             '1st', '2nd', '3rd', 'Chakugai', 'win', '2ren', '3ren']
X_columns = ['1st', '2nd', '3rd', 'Chakugai', 'win', '2ren', '3ren']


In [4]:
def zscore(x, axis = None):
    xmean = x.mean(axis=axis, keepdims=True)
    xstd  = np.std(x, axis=axis, keepdims=True)
    zscore = (x-xmean)/xstd
    return zscore, xmean, xstd

In [5]:
def zscore_cor(y, xmean, xstd, axis = None):
    zscore = (y-xmean)/xstd
    return zscore

In [6]:
def get_train_test_data(df_train):
    X = []
    target = []
        
    # 各レース毎に
    grouped = df_train.groupby(['date', 'place', 'race_num'])
    for race_name, group in tqdm(grouped):
        #print(race_name)
        racer_count = group.shape[0]
        # もし、９輪ではないレースは、トレーニングの対象から外す（モデルを固めるため）
        if racer_count != 9:
            continue
        X.append(group[X_columns].values)
        target.append(group['target'].values)

    X = np.array(X, dtype='float')
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
    d_ = np.array(target)

    X_train, X_test, d_train, d_test = train_test_split(X, d_, test_size = 0.2)

    return X_train, X_test, d_train, d_test


In [10]:
def get_predict_data(df_predict):
    X = []
    race_info = []
        
    # 各レース毎に
    grouped = df_predict.groupby(['date', 'place', 'race_num'])
    for race_name, group in tqdm(grouped):
        #print(race_name)
        racer_count = group.shape[0]
        # もし、９輪ではないレースは、トレーニングの対象から外す（モデルを固めるため）
        if racer_count != 9:
            continue
        X.append(group[X_columns].values)
        race_info.append(race_name)

    X = np.array(X, dtype='float')
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])

    return X, race_info

In [9]:
def plot(history):

    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()  # 2つのプロットを関連付ける

    ax1.plot(history['loss'], label='loss', color='orange')
    ax1.set_ylabel('loss')
    ax1.set_ylim(0, 1.0)
    ax1.legend(loc='best', bbox_to_anchor=(1.01, 0.71, 0.322, .100), borderaxespad=0.,)

    ax2.plot(history['accuracy'], label='accuracy', color='dodgerblue')
    ax2.set_ylabel('accuracy')
    ax2.set_ylim(0, 1.0)
    ax2.legend(loc='best', bbox_to_anchor=(1.01, 0.8, 0.4, .100), borderaxespad=0.,)

    plt.savefig("output.png", bbox_inches='tight')
    plt.show()


# 学習

### 学習データを読み込む

In [7]:
df_train = pd.read_csv('df_train.csv')
display(df_train)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,date,place,race_num,predict,koukiai,evaluation,bracket,car_num,name,name_id,prefecture,locality,age,period,rank,leg,gear,racing piont,S,B,Nige,Maki,Sashi,Ma,1st,2nd,3rd,Chakugai,win,2ren,3ren,result,target
0,20120626,aomori,1,,,,1,1,高田 隼人,b4dda5f1,12,6,30,88,2,2,3.85,87.70,8,11,2,5,2,1,3,7,3,14,11.1,37.0,48.1,1,0.250000
1,20120626,aomori,1,,,,2,2,太田 智洋,043a002b,4,2,42,69,2,3,4.17,82.00,10,1,0,1,0,4,0,5,2,22,0.0,17.2,24.1,6,0.111111
2,20120626,aomori,1,,,,3,3,長谷部 純也,0736a99b,8,3,45,57,3,3,3.92,88.74,0,0,0,0,3,1,1,3,6,17,3.7,14.8,37.0,5,0.138889
3,20120626,aomori,1,,,,4,4,今井 英明,4bf02e00,14,6,43,71,2,3,3.71,80.78,2,0,0,0,0,1,0,1,0,13,0.0,7.1,7.1,7,0.083333
4,20120626,aomori,1,,,,4,5,植田 誠,24ff0845,22,6,44,61,2,3,3.64,86.81,2,0,0,0,2,1,1,2,0,9,8.3,25.0,25.0,3,0.194444
5,20120626,aomori,1,,,,5,6,柁原 翔太,2d0a486d,14,6,25,96,2,2,3.64,75.08,4,3,0,0,0,0,0,0,0,25,0.0,0.0,0.0,9,0.027778
6,20120626,aomori,1,,,,5,7,堀内 昇,e0c92596,8,3,24,95,2,2,4.08,85.28,5,14,1,2,2,0,4,1,5,22,12.5,15.6,31.2,4,0.166667
7,20120626,aomori,1,,,,6,8,濱野 修,cfb673d4,9,3,51,54,2,3,3.86,76.44,0,0,0,0,0,0,0,0,1,27,0.0,0.0,3.5,8,0.055556
8,20120626,aomori,1,,,,6,9,庄子 信弘,e2c00c6f,4,2,33,84,3,1,3.86,84.45,4,2,0,2,1,2,3,2,3,16,12.5,20.8,33.3,2,0.222222
9,20120626,aomori,2,,,,1,1,甲斐 康昭,e7b6e311,10,5,27,89,3,1,3.85,86.86,3,2,1,2,2,1,1,5,3,20,3.4,20.6,31.0,1,0.250000


In [8]:
print("Generating Training/Test Data")
X_train, X_test, Y_train, Y_test = get_train_test_data(df_train)

Generating Training/Test Data


100%|████████████████████████████████████████████████████████████████████████| 146536/146536 [00:57<00:00, 2568.87it/s]


### 列方向にデータを標準化:（数値 - 平均) / 標準偏差

In [14]:
X_train_z, xmean, xstd = zscore(X_train, axis=0)
X_test_z = zscore_cor(X_test, xmean, xstd, axis=0)

In [15]:
X_train_z

array([[-0.46474362, -0.56510922, -1.03460687, ...,  0.30104819,
        -0.33791916, -0.0469656 ],
       [-1.13142946, -0.56510922,  0.64791257, ..., -0.12900248,
         0.59113808,  0.26861471],
       [ 1.20197098,  1.5155183 , -0.47376706, ..., -0.7192681 ,
        -0.82264467, -1.0200049 ],
       ...,
       [ 0.86862806,  0.47520454, -0.47376706, ..., -1.09872457,
        -0.95055835, -0.96740818],
       [-0.1314007 , -0.56510922,  0.08707276, ..., -0.7192681 ,
        -0.21000548, -1.0200049 ],
       [ 0.20194222,  0.99536142,  1.20875238, ..., -0.78672702,
        -0.99095214, -0.5663582 ]])

### モデルの定義

In [16]:
import dnn_model
model = dnn_model.DNN(n_in = X_train.shape[1], n_hiddens=[256], n_out=9)

### 学習

In [17]:
print("Training ...")
history = model.fit(X_train=X_train_z, Y_train=Y_train, nb_epoch = 1200, batch_size=16, p_keep=0.5, restore=False)

accuracy = model.evaluate(X_test_z, Y_test)
print('accuracy: ', accuracy)

Training ...
stddev:  0.01113588507968435
stddev:  0.02946278254943948
epoch: 0  loss: 2.7298253  accuracy: 0.31010967
epoch: 1  loss: 2.7292862  accuracy: 0.3106414
epoch: 2  loss: 2.7292862  accuracy: 0.3078631
epoch: 3  loss: 2.7292857  accuracy: 0.31341973
epoch: 4  loss: 2.7293224  accuracy: 0.30705217
epoch: 5  loss: 2.7291489  accuracy: 0.31054837
epoch: 6  loss: 2.7294948  accuracy: 0.30301097
epoch: 7  loss: 2.7295268  accuracy: 0.2996876
epoch: 8  loss: 2.7290516  accuracy: 0.31359255
epoch: 9  loss: 2.729154  accuracy: 0.31139913
epoch: 10  loss: 2.7289233  accuracy: 0.3123164
epoch: 11  loss: 2.7290068  accuracy: 0.31173146
epoch: 12  loss: 2.7291121  accuracy: 0.3068528
epoch: 13  loss: 2.7289355  accuracy: 0.3070123
epoch: 14  loss: 2.7289896  accuracy: 0.310216
epoch: 15  loss: 2.7288387  accuracy: 0.31544036
epoch: 16  loss: 2.7288146  accuracy: 0.3147225
epoch: 17  loss: 2.728782  accuracy: 0.31808573
epoch: 18  loss: 2.7289124  accuracy: 0.31223664
epoch: 19  loss: 2.

epoch: 331  loss: 2.7262611  accuracy: 0.33011633
epoch: 332  loss: 2.7263038  accuracy: 0.33064806
epoch: 333  loss: 2.7262  accuracy: 0.33007643
epoch: 334  loss: 2.7262223  accuracy: 0.32922566
epoch: 335  loss: 2.7262135  accuracy: 0.3287338
epoch: 336  loss: 2.726223  accuracy: 0.33332005
epoch: 337  loss: 2.7263296  accuracy: 0.33234963
epoch: 338  loss: 2.7263212  accuracy: 0.33088735
epoch: 339  loss: 2.7262263  accuracy: 0.3325889
epoch: 340  loss: 2.726161  accuracy: 0.33082086
epoch: 341  loss: 2.7261982  accuracy: 0.33125955
epoch: 342  loss: 2.7262132  accuracy: 0.33435693
epoch: 343  loss: 2.7261832  accuracy: 0.33639082
epoch: 344  loss: 2.7262833  accuracy: 0.3315387
epoch: 345  loss: 2.7263563  accuracy: 0.32842806
epoch: 346  loss: 2.7261615  accuracy: 0.3338385
epoch: 347  loss: 2.7262816  accuracy: 0.327657
epoch: 348  loss: 2.7262294  accuracy: 0.3336125
epoch: 349  loss: 2.7261765  accuracy: 0.33249584
epoch: 350  loss: 2.7261653  accuracy: 0.33188435
epoch: 351  

epoch: 661  loss: 2.7258434  accuracy: 0.33597872
epoch: 662  loss: 2.725772  accuracy: 0.33290794
epoch: 663  loss: 2.7258852  accuracy: 0.33447656
epoch: 664  loss: 2.7258112  accuracy: 0.3372017
epoch: 665  loss: 2.72565  accuracy: 0.33878365
epoch: 666  loss: 2.725812  accuracy: 0.33479562
epoch: 667  loss: 2.7257981  accuracy: 0.33956796
epoch: 668  loss: 2.725921  accuracy: 0.3341974
epoch: 669  loss: 2.7257385  accuracy: 0.33774677
epoch: 670  loss: 2.725744  accuracy: 0.33571285
epoch: 671  loss: 2.7258985  accuracy: 0.33516783
epoch: 672  loss: 2.7257419  accuracy: 0.33048853
epoch: 673  loss: 2.7257059  accuracy: 0.33838484
epoch: 674  loss: 2.725764  accuracy: 0.3291592
epoch: 675  loss: 2.7257435  accuracy: 0.33508807
epoch: 676  loss: 2.7256846  accuracy: 0.33689597
epoch: 677  loss: 2.725862  accuracy: 0.33265537
epoch: 678  loss: 2.7257736  accuracy: 0.32991692
epoch: 679  loss: 2.725776  accuracy: 0.33302757
epoch: 680  loss: 2.7256904  accuracy: 0.3383051
epoch: 681  l

epoch: 991  loss: 2.7254999  accuracy: 0.33568627
epoch: 992  loss: 2.7255461  accuracy: 0.33450314
epoch: 993  loss: 2.7254834  accuracy: 0.33790627
epoch: 994  loss: 2.7256057  accuracy: 0.33330676
epoch: 995  loss: 2.7255015  accuracy: 0.3395148
epoch: 996  loss: 2.7255776  accuracy: 0.3372549
epoch: 997  loss: 2.7255194  accuracy: 0.33188435
epoch: 998  loss: 2.725572  accuracy: 0.3382918
epoch: 999  loss: 2.7254739  accuracy: 0.33774677
epoch: 1000  loss: 2.7256465  accuracy: 0.3317381
epoch: 1001  loss: 2.725602  accuracy: 0.339661
epoch: 1002  loss: 2.7255025  accuracy: 0.33659023
epoch: 1003  loss: 2.7255733  accuracy: 0.33411765
epoch: 1004  loss: 2.725571  accuracy: 0.3368295
epoch: 1005  loss: 2.7256157  accuracy: 0.33552676
epoch: 1006  loss: 2.7255855  accuracy: 0.34079096
epoch: 1007  loss: 2.7254999  accuracy: 0.33843803
epoch: 1008  loss: 2.7255883  accuracy: 0.33857095
epoch: 1009  loss: 2.7255921  accuracy: 0.33974078
epoch: 1010  loss: 2.7256255  accuracy: 0.3402991


In [None]:
plot(history)

# 予測

### クローラーで取得したデータを読み込む

In [None]:
df_predict = pd.read_csv('df_predict.csv')
display(df_predict)

In [None]:
print("Generating Predict Data")
X_pred, race_info = get_predict_data(df_predict)

In [None]:
# 列方向にデータを標準化:（数値 - 平均) / 標準偏差
X_pred_z = zscore_cor(X_pred, xmean, xstd, axis=0)

### 学習したモデルを元にレース結果を予測

In [None]:
print("Predicting ...")
Y_pred = model.predict(X_pred_z)

for index in range(len(Y_pred)):
    print('レース', race_info[index])
    print('予想順位（車番左から１ -> 9位）', np.argsort(-Y_pred[index]) + 1)
    print('勝率予測', Y_pred[index])