In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import hashlib
import math, os
from tqdm import tqdm

  from ._conv import register_converters as _register_converters


In [2]:
pd.set_option('display.max_columns', 40)

In [3]:
# トレーニング用の入力データの選択
X_columns = ['locality', 'age', 'rank', 'leg', 'racing piont', \
             'S', 'B', 'Nige', 'Maki', 'Sashi', 'Ma', \
             '1st', '2nd', '3rd', 'Chakugai', 'win', '2ren', '3ren']


In [4]:
# Webスクレイピングで取得した戦績データをファイルから読み取り、データフレームに変換＋データ前処理
def get_df_predict(filename):
    
    init_flag = True
    print('loading data for predict')
    df_predict = pd.read_csv(filename, encoding="SHIFT_JIS", header=0, nrows=None)

    display(df_predict)
    targets = []
    name_ids = []
    localities = []

    for index, row in df_predict.iterrows():

        # 名前をハッシュを使ってID化
        name = row['name']
        name_hash = hashlib.md5(name.encode()).hexdigest()
        name_id = name_hash[-8:]
        name_ids.append(name_id)

        # 　ランクの例外処理
        if row['rank'] == 'SS':
            df_predict.loc[index, 'rank'] = '0'
        elif row['rank'] == 'L1':
            df_predict.loc[index, 'rank'] = '6'

        # 出身地を地区毎にグループ化
        prefecture = row['prefecture']
        if prefecture in {'1', '2', '3', '5'}:
            locality = '1' #北東北
        elif prefecture in {'4', '6', '7'}:
            locality = '2' #南東北
        elif prefecture in {'8', '9'}:
            locality = '3' #茨栃
        elif prefecture in {'11', '13'}:
            locality = '4' #埼京
        elif prefecture in {'10', '15', '19', '20'}:
            locality = '5' #上信越
        elif prefecture in {'12', '14', '22'}:
            locality = '6' #南関東
        elif prefecture in {'16', '17', '21', '23', '24'}:
            locality = '7' #中部
        elif prefecture in {'18', '25', '26', '27', '28', '29', '30'}:
            locality = '8' #近畿
        elif prefecture in {'31', '32', '33', '34', '35'}:
            locality = '9' #中国
        elif prefecture in {'36', '37', '38', '39'}:
            locality = '10' #四国
        elif prefecture in {'40', '41', '42', '43', '44', '45', '46', '47'}:
            locality = '11' #九州
        else:
            locality = '12' #外国

        localities.append(locality)

    # 前処理したデータのデータフレームへの置き換え
    df_predict['name_id'] = name_ids
    df_predict['locality'] = localities

    # カラムの順番入れ替え（見やすさのため）
    columns = list(df_predict.columns)
    columns.remove('name_id')
    columns.insert(columns.index("name") + 1, "name_id")
    columns.remove('locality')
    columns.insert(columns.index("prefecture") + 1, "locality")

    df_predict = df_predict.loc[:,columns]

    return df_predict

In [5]:
def get_predict_data(df_predict):
    X = []
        
    # 各レース毎に
    grouped = df_predict.groupby(['date', 'place', 'race_num'])
    for race_name, group in tqdm(grouped):
        #print(race_name)
        racer_count = group.shape[0]
        # もし、９輪ではないレースは、トレーニングの対象から外す（モデルを固めるため）
        if racer_count != 9:
            continue
        X.append(group[X_columns].values)

    X = np.array(X)
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])

    return X


In [6]:
for filename in os.listdir('predict/'):
    df_predict = get_df_predict('predict/' + filename)

loading data for predict


Unnamed: 0,date,place,race_num,predict,koukiai,evaluation,bracket,car_num,name,prefecture,age,period,rank,leg,gear,racing piont,S,B,Nige,Maki,Sashi,Ma,1st,2nd,3rd,Chakugai,win,2ren,3ren,Unnamed: 29
0,20180927,aomori,1,9.0,,15,1,1,黒田 淳,33,33,97,5,1,3.92,105.06,1,7,2,3,3,1,6,3,6,17,18.7,28.1,46.8,
1,20180927,aomori,1,,,20,2,2,三上 佳孝,12,36,88,4,1,3.85,99.03,2,2,0,2,4,3,2,7,2,22,6.0,27.2,33.3,
2,20180927,aomori,1,5.0,,18,3,3,前田 拓也,27,44,71,5,3,3.86,103.73,0,0,0,0,3,0,2,1,3,13,10.5,15.7,31.5,
3,20180927,aomori,1,4.0,,18,4,4,野口 正則,29,28,105,4,2,3.85,97.93,0,11,1,3,1,0,2,3,1,23,6.8,17.2,20.6,
4,20180927,aomori,1,8.0,,20,4,5,吉永 和生,34,42,80,4,3,3.92,100.37,0,1,0,2,3,2,4,3,2,16,16.0,28.0,36.0,
5,20180927,aomori,1,,,24,5,6,池田 智毅,30,48,68,4,3,3.92,94.72,0,0,0,0,1,1,0,2,2,19,0.0,8.6,17.3,
6,20180927,aomori,1,6.0,,20,5,7,吉田 裕全,11,33,90,4,1,3.92,99.78,3,0,0,0,5,3,5,3,3,21,15.6,25.0,34.3,
7,20180927,aomori,1,,,24,6,8,北浦 和人,14,35,105,4,1,3.92,95.25,0,0,0,0,1,0,0,1,1,10,0.0,8.3,16.6,
8,20180927,aomori,1,7.0,,20,6,9,兵藤 一也,10,40,82,4,3,3.85,100.33,1,0,0,0,2,3,1,4,3,13,4.7,23.8,38.0,
9,20180927,aomori,2,7.0,,19,1,1,松山 桂輔,23,34,88,4,1,3.93,99.19,3,4,1,2,3,2,4,4,4,19,12.9,25.8,38.7,


In [11]:
print("Generating Training/Test Data")
X = get_predict_data(df_predict)
print(X.shape)

#saver = tf.train.Saver()
sess = tf.Session()
saver = tf.train.import_meta_graph('model/training_model.meta')
saver.restore(sess, 'model/checkpoint')

print("Predicting ...")



Generating Training/Test Data


100%|████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 2005.42it/s]


(42, 162)
INFO:tensorflow:Restoring parameters from model/checkpoint


DataLossError: Unable to open table file model\checkpoint: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
	 [[Node: save_9/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_9/Const_0_0, save_9/RestoreV2/tensor_names, save_9/RestoreV2/shape_and_slices)]]

Caused by op 'save_9/RestoreV2', defined at:
  File "C:\Users\taker\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\taker\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\taker\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\taker\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\taker\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\Users\taker\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\Users\taker\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\taker\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\taker\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\taker\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\taker\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\taker\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\taker\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-0b6ca89b1f24>", line 7, in <module>
    saver = tf.train.import_meta_graph('model/training_model.meta')
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1939, in import_meta_graph
    **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\meta_graph.py", line 744, in import_scoped_meta_graph
    producer_op_list=producer_op_list)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\importer.py", line 442, in import_graph_def
    _ProcessNewOps(graph)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\importer.py", line 234, in _ProcessNewOps
    for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3289, in _add_new_tf_operations
    for c_op in c_api_util.new_tf_operations(self)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3289, in <listcomp>
    for c_op in c_api_util.new_tf_operations(self)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3180, in _create_op_from_tf_operation
    ret = Operation(c_op, self)
  File "C:\Users\taker\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

DataLossError (see above for traceback): Unable to open table file model\checkpoint: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
	 [[Node: save_9/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_9/Const_0_0, save_9/RestoreV2/tensor_names, save_9/RestoreV2/shape_and_slices)]]
