In [31]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import hashlib
import math, os
from tqdm import tqdm

In [32]:
import dnn_model

In [33]:
pd.set_option('display.max_columns', 40)

In [34]:
# トレーニング用の入力データの選択
#X_columns = ['locality', 'age', 'rank', 'leg', 'racing piont', \
#             'S', 'B', 'Nige', 'Maki', 'Sashi', 'Ma', \
#             '1st', '2nd', '3rd', 'Chakugai', 'win', '2ren', '3ren']
X_columns = ['rank', 'racing piont', '1st', '2nd', '3rd', 'Chakugai', 'win', '2ren', '3ren']


In [35]:
def zscore(x, axis = None):
    xmean = x.mean(axis=axis, keepdims=True)
    xstd  = np.std(x, axis=axis, keepdims=True)
    zscore = (x-xmean)/xstd
    return zscore

In [36]:
# Webスクレイピングで取得した戦績データをファイルから読み取り、データフレームに変換＋データ前処理
def get_df_predict(filename):
    
    init_flag = True
    print('loading data for predict')
    df_predict = pd.read_csv(filename, encoding="SHIFT_JIS", header=0, nrows=None)

    display(df_predict)
    targets = []
    name_ids = []
    localities = []

    for index, row in df_predict.iterrows():

        # 名前をハッシュを使ってID化
        name = row['name']
        name_hash = hashlib.md5(name.encode()).hexdigest()
        name_id = name_hash[-8:]
        name_ids.append(name_id)

        # 　ランクの例外処理
        if row['rank'] == 'SS':
            df_predict.loc[index, 'rank'] = '0'
        elif row['rank'] == 'L1':
            df_predict.loc[index, 'rank'] = '6'

        # 出身地を地区毎にグループ化
        prefecture = row['prefecture']
        if prefecture in {'1', '2', '3', '5'}:
            locality = '1' #北東北
        elif prefecture in {'4', '6', '7'}:
            locality = '2' #南東北
        elif prefecture in {'8', '9'}:
            locality = '3' #茨栃
        elif prefecture in {'11', '13'}:
            locality = '4' #埼京
        elif prefecture in {'10', '15', '19', '20'}:
            locality = '5' #上信越
        elif prefecture in {'12', '14', '22'}:
            locality = '6' #南関東
        elif prefecture in {'16', '17', '21', '23', '24'}:
            locality = '7' #中部
        elif prefecture in {'18', '25', '26', '27', '28', '29', '30'}:
            locality = '8' #近畿
        elif prefecture in {'31', '32', '33', '34', '35'}:
            locality = '9' #中国
        elif prefecture in {'36', '37', '38', '39'}:
            locality = '10' #四国
        elif prefecture in {'40', '41', '42', '43', '44', '45', '46', '47'}:
            locality = '11' #九州
        else:
            locality = '12' #外国

        localities.append(locality)

    # 前処理したデータのデータフレームへの置き換え
    df_predict['name_id'] = name_ids
    df_predict['locality'] = localities

    # カラムの順番入れ替え（見やすさのため）
    columns = list(df_predict.columns)
    columns.remove('name_id')
    columns.insert(columns.index("name") + 1, "name_id")
    columns.remove('locality')
    columns.insert(columns.index("prefecture") + 1, "locality")

    df_predict = df_predict.loc[:,columns]

    return df_predict

In [37]:
def get_predict_data(df_predict):
    X = []
    race_info = []
        
    # 各レース毎に
    grouped = df_predict.groupby(['date', 'place', 'race_num'])
    for race_name, group in tqdm(grouped):
        #print(race_name)
        racer_count = group.shape[0]
        # もし、９輪ではないレースは、トレーニングの対象から外す（モデルを固めるため）
        if racer_count != 9:
            continue
        X.append(group[X_columns].values)
        race_info.append(race_name)

    X = np.array(X, dtype='float')
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])

    return X, race_info


In [38]:
for filename in os.listdir('predict/'):
    df_predict = get_df_predict('predict/' + filename)

loading data for predict


Unnamed: 0,date,place,race_num,predict,koukiai,evaluation,bracket,car_num,name,prefecture,age,period,rank,leg,gear,racing piont,S,B,Nige,Maki,Sashi,Ma,1st,2nd,3rd,Chakugai,win,2ren,3ren,Unnamed: 29
0,20181004,iwakitaira,1,,,,1,1,小笠原 昭太,2,42,76,3,3,3.92,89.80,0,0,0,0,4,2,2,4,2,13,9.5,28.5,38.0,
1,20181004,iwakitaira,1,,,,2,2,西島 貢司,43,49,64,2,3,3.85,84.47,0,0,0,0,2,1,1,2,3,15,4.7,14.2,28.5,
2,20181004,iwakitaira,1,,,,3,3,山口 智弘,23,28,99,3,2,3.93,86.61,0,7,1,3,0,0,0,4,1,16,0.0,19.0,23.8,
3,20181004,iwakitaira,1,,,,4,4,渡辺 福太郎,14,33,103,2,2,3.83,81.29,1,7,3,1,0,0,0,4,0,13,0.0,23.5,23.5,
4,20181004,iwakitaira,1,,,,4,5,佐藤 政利,15,36,91,2,3,3.92,83.81,1,1,0,0,2,0,1,1,3,17,4.5,9.0,22.7,
5,20181004,iwakitaira,1,,,,5,6,富澤 洋祐,22,42,81,2,3,3.85,78.89,0,0,0,0,1,0,0,1,0,19,0.0,5.0,5.0,
6,20181004,iwakitaira,1,,,,5,7,本馬 陵,7,29,99,2,2,3.86,82.25,1,5,0,1,1,0,2,0,3,19,8.3,8.3,20.8,
7,20181004,iwakitaira,1,,,,6,8,北浦 弘幹,30,43,77,2,3,3.92,80.89,0,0,0,0,1,0,0,1,1,17,0.0,5.2,10.5,
8,20181004,iwakitaira,1,,,,6,9,徳吉 正治,40,35,89,2,1,3.86,82.33,2,4,1,4,0,0,4,1,4,12,19.0,23.8,42.8,
9,20181004,iwakitaira,2,,,,1,1,大沼 孝行,22,43,74,3,2,3.92,84.04,0,15,4,1,0,0,4,1,5,14,16.6,20.8,41.6,


In [39]:
print("Generating Predict Data")
X_pred, race_info = get_predict_data(df_predict)

Generating Predict Data


100%|████████████████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 2131.04it/s]


In [40]:
# 列方向にデータを標準化:（数値 - 平均) / 標準偏差
X_pred_z = zscore(X_pred, axis=0)

In [46]:
X_pred.shape[1]

81

In [47]:
model = dnn_model.DNN(n_in = X_pred.shape[1], n_hiddens=[1024, 2048, 2048], n_out=9)

In [48]:
#model = DNN()
print("Predicting ...")
Y_pred = model.predict(X_pred = X_pred_z)

Predicting ...
stddev:  0.004910463758239913
stddev:  0.0009765625
stddev:  0.0006905339660024879
stddev:  0.010416666666666666
INFO:tensorflow:Restoring parameters from model/training_model


InvalidArgumentError: Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [1024] rhs shape= [256]
	 [[{{node save_3/Assign_1}} = Assign[T=DT_FLOAT, _class=["loc:@Variable_1"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Variable_1, save_3/RestoreV2/_3)]]
	 [[{{node save_3/RestoreV2/_42}} = _Send[T=DT_FLOAT, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_48_save_3/RestoreV2", _device="/job:localhost/replica:0/task:0/device:CPU:0"](save_3/RestoreV2:21)]]

Caused by op 'save_3/Assign_1', defined at:
  File "c:\program files\python36\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\program files\python36\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\program files\python36\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\program files\python36\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\program files\python36\lib\site-packages\ipykernel\kernelapp.py", line 499, in start
    self.io_loop.start()
  File "c:\program files\python36\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "c:\program files\python36\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "c:\program files\python36\lib\asyncio\base_events.py", line 1434, in _run_once
    handle._run()
  File "c:\program files\python36\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "c:\program files\python36\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "c:\program files\python36\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\program files\python36\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "c:\program files\python36\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "c:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 346, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "c:\program files\python36\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 259, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "c:\program files\python36\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 513, in execute_request
    user_expressions, allow_stdin,
  File "c:\program files\python36\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\program files\python36\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\program files\python36\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 2817, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "c:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 2843, in _run_cell
    return runner(coro)
  File "c:\program files\python36\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "c:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3018, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "c:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3183, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "c:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3265, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-48-7eaed6ecff02>", line 3, in <module>
    Y_pred = model.predict(X_pred = X_pred_z)
  File "d:\Projects\keirin_ai\dnn_model.py", line 77, in predict
    saver = tf.train.Saver()
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 1094, in __init__
    self.build()
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 1106, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 1143, in _build
    build_save=build_save, build_restore=build_restore)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 787, in _build_internal
    restore_sequentially, reshape)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 428, in _AddRestoreOps
    assign_ops.append(saveable.restore(saveable_tensors, shapes))
  File "c:\program files\python36\lib\site-packages\tensorflow\python\training\saver.py", line 119, in restore
    self.op.get_shape().is_fully_defined())
  File "c:\program files\python36\lib\site-packages\tensorflow\python\ops\state_ops.py", line 221, in assign
    validate_shape=validate_shape)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\ops\gen_state_ops.py", line 65, in assign
    use_locking=use_locking, name=name)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3272, in create_op
    op_def=op_def)
  File "c:\program files\python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [1024] rhs shape= [256]
	 [[{{node save_3/Assign_1}} = Assign[T=DT_FLOAT, _class=["loc:@Variable_1"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Variable_1, save_3/RestoreV2/_3)]]
	 [[{{node save_3/RestoreV2/_42}} = _Send[T=DT_FLOAT, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_48_save_3/RestoreV2", _device="/job:localhost/replica:0/task:0/device:CPU:0"](save_3/RestoreV2:21)]]


In [44]:
for index in range(len(Y_pred)):
    print('レース', race_info[index])
    print('予想順位（車番左から１ -> 9位）', np.argsort(-Y_pred[index]) + 1)
    print('勝率予測', Y_pred[index])

TypeError: object of type 'function' has no len()