In [None]:
!ls -a

In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

import utils.read_data as rd
import utils.io_model as io_m
import utils.preprocessing as pp
import utils.prepare_data as prepare_data

In [None]:
import os
from os.path import join, dirname
from dotenv import load_dotenv
from pathlib import Path

In [None]:
load_dotenv(verbose=True)
dotenv_path = join(Path().resolve(), '.env')
load_dotenv(dotenv_path)

In [None]:
GOOGLE_DRIVE_PATH = os.environ.get("GOOGLE_DRIVE_PATH")
DATA_PATH = GOOGLE_DRIVE_PATH + '/train_data'

In [None]:
print(GOOGLE_DRIVE_PATH)

In [None]:
df = rd.read_horse_race_csv(DATA_PATH)

In [None]:
df.info()

In [None]:
df.head()

## 前処理

In [None]:
df_for_learning = prepare_data.prepare_train_data(df, use_default_make_label=True)

In [None]:
# 学習に使う特徴量のカラム名をリスト化
columns_for_learning = df_for_learning.columns.values.tolist()
columns_for_learning.remove("label_high")
columns_for_learning.remove("label_middle")
columns_for_learning.remove("label_low")

In [None]:
df_for_learning.info()

## 学習

In [None]:
# 学習に用いるデータセットの作成
x = np.array(df_for_learning[columns_for_learning])
y = np.array(df_for_learning[["label_high", "label_middle", "label_low"]])
#del df
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=11)
#del x, y
print(x_train.shape)
print(x_test.shape)

In [None]:
# データセットのシャッフルとバッチ化
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(1024)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(1024)

In [None]:
import utils.horse_strong_model as model
    
# モデルのインスタンスを作成
model = model.HorseStrongModel(x_train.shape[1], 3)

In [None]:
metrics = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.AUC(name='auc'),
]
model.compile(optimizer='adam', 
              loss='categorical_crossentropy',
              metrics=metrics)

In [None]:
model.fit(train_ds, epochs=10)#500)

In [None]:
model.evaluate(test_ds, verbose=2)

In [None]:
#テストデータの予測値と正解ラベルの確認 (時間がかかるのでコメントアウト)
#for x, y in zip(x_test, y_test):
#    print(f"pred: {model.predict(x.reshape(1, -1))}, label: {y}")

In [None]:
# モデルの保存
io_m.save_model(model, model_name="horse_strong_model")

In [None]:
# 保存したモデルに不具合がないか確認
test_model = io_m.read_model("horse_strong_model")
test_model.evaluate(test_ds, verbose=2)

In [None]:
test_model.latent_output_layer(test_model.d1((x[0].reshape(1, -1))))

In [None]:
dir(test_model)

In [None]:
x[0].shape