In [35]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

import unicodedata
import re

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
# 例：DataFrameが df で、対象の列が「レース日」
def normalize_date(s):
    # 全角→半角変換 & スペース除去
    s = unicodedata.normalize('NFKC', s).replace(" ", "")
    
    # 年・月・日 を抽出

    match = re.match(r"(\d{4})年(\d{1,2})月(\d{1,2})日", s)
    if match:
        year, month, day = match.groups()
        return f"{year}/{int(month):02}/{int(day):02}"
    return None  # フォーマット不一致時

def normalize_race_round(s):
    # 全角→半角 + スペース除去
    s = unicodedata.normalize('NFKC', s).replace(" ", "")
    
    # 数字部分を取り出す
    match = re.match(r"(\d+)R", s.upper())
    if match:
        num = int(match.group(1))
        return f"{num:02}R"
    return None

In [4]:
racelist_df = pd.read_csv(
    "./downloads/racelists/csv/timetable_200901-200907.csv", encoding="shift-jis"
)
detail_df = pd.read_csv(
    "./downloads/results/details/details_200901-200907.csv", encoding="shift-jis"
)

In [5]:
racelist_df["レース日"] = racelist_df["レース日"].apply(normalize_date)
racelist_df["レース回"] = racelist_df["レース回"].apply(normalize_race_round)
df = racelist_df.merge(detail_df, on=["レース日", "レース場", "レース回"])

df['日次'] = df['日次_x'].str.extract(r'(\d+)').astype(int)

key_columns = ['日次', 'レース場','年齢','体重','級別','勝率','2連対率','今節成績','展示','進入コース','スタートタイミング','天候','風向き','風速','波の']

df_clean = df[[col for col in df.columns if any(kc in col for kc in key_columns)]]
df_clean = df_clean.drop(["日次_y","日次_x"],axis=1)

onehot_columns = ['天候','レース場','級別']
matched_columns = [col for col in df.columns if any(kc in col for kc in onehot_columns)]
df_onehot = pd.get_dummies(df_clean, columns=matched_columns)

In [6]:
df_onehot = df_onehot.map(lambda x: 0 if pd.isna(x) or (isinstance(x, str) and x.strip() == "") else x)

In [8]:
df_onehot = df_onehot.apply(pd.to_numeric,errors='coerce')
df_onehot = df_onehot.fillna(0)

In [12]:
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_onehot), columns=df_onehot.columns)

In [20]:
df_single = df[['単勝_艇番','単勝_払戻金']].astype(float)

In [30]:
def creat_single_prediction_model():
    inputs = Input(shape=(len(df_onehot.columns),))
    model = Dense(64,activation='relu')(inputs)
    model = Dense(32,activation='relu')(model)

    outputs = Dense(6,activation='sigmoid')(model)
    model = Model(inputs=inputs,outputs=outputs)

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['acc']
    )
    return model


In [56]:
single_prediction_model = creat_single_prediction_model()

In [58]:
X = df_scaled.values
y = df_single['単勝_艇番']-1

In [59]:
X_train,X_test,y_train,y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)

In [60]:
# ---- モデル学習 ----
single_prediction_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - acc: 0.1685 - loss: 1.9069 - val_acc: 0.2917 - val_loss: 1.6701
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.3631 - loss: 1.6022 - val_acc: 0.3750 - val_loss: 1.5294
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.5440 - loss: 1.3692 - val_acc: 0.4583 - val_loss: 1.4437
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.5639 - loss: 1.2493 - val_acc: 0.5000 - val_loss: 1.3998
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.5982 - loss: 1.1552 - val_acc: 0.5000 - val_loss: 1.3956
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.6071 - loss: 1.0716 - val_acc: 0.5000 - val_loss: 1.3764
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - acc: 0.5801 - loss: 1.0463 - val

<keras.src.callbacks.history.History at 0x2673edafd10>