In [20]:
#インポート文
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from lightgbm import LGBMRanker

from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold

import optuna

In [2]:
#データのインポート
race_df = pd.read_csv("../data/main/race_data.csv",parse_dates=['datetime'])
horse_df = pd.read_csv("../data/main/horse_data.csv")

In [3]:
#データのマージ
race_df_for_merge = race_df[['race_id',
                             "race_round",
                             "race_title",
                             "weather",
                             "ground_status",
                             "where_racecourse",
                             "total_horse_number",
                             "frame_number_first",
                             "horse_number_first",
                             "frame_number_second",
                             "horse_number_second",
                             "frame_number_third",
                             "horse_number_third",
                             "tansyo",
                             "hukusyo_first",
                             "hukusyo_second",
                             "hukusyo_third",
                             "wakuren",
                             "umaren",
                             "wide_1_2",
                             "wide_1_3",
                             "wide_2_3",
                             "umatan",
                             "renhuku3",
                             "rentan3",
                             "is_obstacle",
                             "ground_type",
                             "is_left_right_straight",
                             "distance",
                             "weather_rain",
                             "weather_snow",
                             "datetime"
                            ]]

merged_horse_df = pd.merge(horse_df, race_df_for_merge, on='race_id')

In [4]:
#datetimeをUnix時間に変換してint32に変換する
unix_time = merged_horse_df['datetime'].astype('int64')
unix_time_int32 = unix_time.astype('int32')

merged_horse_df['datetime'] = unix_time_int32

In [6]:
#エンコード
for column in ['race_title',
               'weather',
               'where_racecourse',
               'ground_type',
               'is_left_right_straight']:
    le = LabelEncoder()
    le.fit(merged_horse_df[column])
    merged_horse_df[column] = le.transform(merged_horse_df[column])

In [8]:
#使う特徴量の設定
id = {"race_id",
      "horse_id"
    }

feature = [
#horse_df
           "race_id",#後で抜く
           "horse_id",#後で抜く
#            "rank",#目的変数
    
    
           "frame_number",
           "horse_number",
           "sex_and_age",
           "burden_weight",
           "rider_id",
#            "goal_time",結果なので
#            "goal_time_dif",結果なので
#            "half_way_rank",結果なので
#            "last_time",結果なので
#            "odds",
#            "popular",
           "horse_weight",
           "tamer_id",
           "owner_id",
           "is_down",
           "is_senba",
           "is_mesu",
           "is_osu",
           "horse_weight_dif",
           "burden_weight_rate",
#            "avg_velocity",結果なので

#race_df           
             "race_round",
#              "race_title",タイトルはややこしいので
             "weather",
             "ground_status",
             "where_racecourse",
             "total_horse_number",
#              "frame_number_first",結果なので
#              "horse_number_first",
#              "frame_number_second",
#              "horse_number_second",
#              "frame_number_third",
#              "horse_number_third",
#              "tansyo",
#              "hukusyo_first",
#              "hukusyo_second",
#              "hukusyo_third",
#              "wakuren",
#              "umaren",
#              "wide_1_2",
#              "wide_1_3",
#              "wide_2_3",
#              "umatan",
#              "renhuku3",
#              "rentan3",
             "is_obstacle",
             "ground_type",
             "is_left_right_straight",
             "distance",
             "weather_rain",
             "weather_snow",
             "datetime"
            ]

target = ['rank']

X_id = merged_horse_df[feature]
y = merged_horse_df[target]

In [43]:
#スコア結果を入れるリスト
reports = []
auc_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
support_scores = []

#予測結果を入れるリスト
pred_df_list = []

def objective(trial):
    
# Optuna
    # ハイパーパラメータの探索範囲
    params = {
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 10, 100),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0),
        'verbose': -1
    }
    
    #LGBMRankerの設定
    ranker = LGBMRanker(**params)

# K-fold
    #kflodの設定
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)

    #kflodで学習用と検証用に分ける
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_id, y)):
        #id入りの学習データ
        X_train_id, X_val_id = X_id.iloc[train_idx], X_id.iloc[val_idx]
        
        #id抜きの学習データ
        X = X_id.drop(id, axis =1)
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        
        #検証データ
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

# Fit
        # train_basketsを計算
        train_baskets = X_train_id.groupby(["race_id"])["horse_id"].count().values

        # モデルの学習
        model = ranker.fit(X_train,
                           y_train,
                           group=train_baskets)

# Predict
        # モデルの予測
        y_pred = model.predict(X_val)

        #結果を入れるデータフレームを用意
        pred_df_fold = pd.DataFrame({
           "race_id": X_val_id['race_id'],
           "horse_id": X_val_id['horse_id'],
           "rank":y_val['rank'],
           "pred": y_pred
           })

        #そのデータフレームをリストに保存
        pred_df_list.append(pred_df_fold)

# Rank
    # 各foldの予測結果を結合してpred_dfを作成
    pred_df = pd.concat(pred_df_list, axis=0).reset_index(drop=True)

    #予測した結果をrace_idごとでグループ化し順位をつける
    pred_df['pred_rank'] = pred_df.groupby('race_id')['pred'].rank(method='min', ascending=False)

    #3位以内ならprizeを1にする
    pred_df['rank_prize'] = pred_df['rank'].apply(lambda x: 1 if x <= 3 else 0)
    pred_df['pred_rank_prize'] = pred_df['pred_rank'].apply(lambda x: 1 if x <= 3 else 0)

#Score
    #正確度を計算
    auc_score = metrics.accuracy_score(pred_df['rank_prize'], pred_df['pred_rank_prize'])
        
    #auc_scoresのリストに保存
    auc_scores.append(auc_score)
    
    #f1を計算            
    f1 = f1_score(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    
    #reportを計算
    report = metrics.classification_report(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    report_num = metrics.precision_recall_fscore_support(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    reports.append(report)

    #reportから情報をとる
    precision_scores.append(report_num[0])
    recall_scores.append(report_num[1])
    f1_scores.append(report_num[2])
    support_scores.append(report_num[3])
    
    return f1

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[32m[I 2023-03-13 17:28:43,463][0m A new study created in memory with name: no-name-811063f4-0f8d-4b00-a49e-9a4784a58ac5[0m




[32m[I 2023-03-13 17:28:44,548][0m Trial 0 finished with value: 0.16055555555555556 and parameters: {'num_leaves': 92, 'learning_rate': 0.07039620133986378, 'feature_fraction': 0.7043254308648812, 'bagging_fraction': 0.6757217275906591, 'bagging_freq': 6, 'lambda_l1': 5.228068389122596, 'lambda_l2': 6.80937069570529}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:45,180][0m Trial 1 finished with value: 0.10740740740740741 and parameters: {'num_leaves': 99, 'learning_rate': 0.04606248179866788, 'feature_fraction': 0.4675249130039866, 'bagging_fraction': 0.3218874399985643, 'bagging_freq': 10, 'lambda_l1': 6.719888700457195, 'lambda_l2': 8.618922364339044}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:45,880][0m Trial 2 finished with value: 0.07694444444444444 and parameters: {'num_leaves': 19, 'learning_rate': 0.06030149660231686, 'feature_fraction': 0.991782211263343, 'bagging_fraction': 0.5744358515715108, 'bagging_freq': 4, 'lambda_l1': 6.637171407047795, 'lambda_l2': 0.3141793897065155}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:46,611][0m Trial 3 finished with value: 0.05888888888888889 and parameters: {'num_leaves': 81, 'learning_rate': 0.08181896921255943, 'feature_fraction': 0.9415945324631977, 'bagging_fraction': 0.3445316973862834, 'bagging_freq': 4, 'lambda_l1': 9.531217844806864, 'lambda_l2': 8.753312900103236}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:47,378][0m Trial 4 finished with value: 0.050370370370370364 and parameters: {'num_leaves': 56, 'learning_rate': 0.01889319813070894, 'feature_fraction': 0.35499455578710915, 'bagging_fraction': 0.36363568765976484, 'bagging_freq': 10, 'lambda_l1': 9.954756149449153, 'lambda_l2': 9.74263509163345}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:48,664][0m Trial 5 finished with value: 0.04380952380952381 and parameters: {'num_leaves': 87, 'learning_rate': 0.014248826390472966, 'feature_fraction': 0.5990694582484096, 'bagging_fraction': 0.5985616886053742, 'bagging_freq': 4, 'lambda_l1': 3.111184529936996, 'lambda_l2': 8.63575440501804}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:50,114][0m Trial 6 finished with value: 0.03722222222222222 and parameters: {'num_leaves': 93, 'learning_rate': 0.004961825705177959, 'feature_fraction': 0.6871532801393894, 'bagging_fraction': 0.5180442655259928, 'bagging_freq': 4, 'lambda_l1': 0.4521705573942416, 'lambda_l2': 4.252090443781202}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:51,092][0m Trial 7 finished with value: 0.03222222222222222 and parameters: {'num_leaves': 35, 'learning_rate': 0.024014931953323337, 'feature_fraction': 0.4693593616770828, 'bagging_fraction': 0.9465966512637679, 'bagging_freq': 2, 'lambda_l1': 3.629963517747032, 'lambda_l2': 5.065225443400262}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:51,937][0m Trial 8 finished with value: 0.02855555555555555 and parameters: {'num_leaves': 93, 'learning_rate': 0.0621320577505747, 'feature_fraction': 0.7051445525445197, 'bagging_fraction': 0.2946298729961914, 'bagging_freq': 8, 'lambda_l1': 6.032215491246884, 'lambda_l2': 7.52233202098455}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:53,055][0m Trial 9 finished with value: 0.025858585858585862 and parameters: {'num_leaves': 66, 'learning_rate': 0.03455546620398547, 'feature_fraction': 0.686717496501801, 'bagging_fraction': 0.3611193043766956, 'bagging_freq': 5, 'lambda_l1': 2.8056411996984187, 'lambda_l2': 6.706522769080298}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:53,876][0m Trial 10 finished with value: 0.02351851851851852 and parameters: {'num_leaves': 69, 'learning_rate': 0.09923924216119231, 'feature_fraction': 0.10998920921723498, 'bagging_fraction': 0.821911985545911, 'bagging_freq': 7, 'lambda_l1': 7.61376569634557, 'lambda_l2': 5.492246952045168}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:54,605][0m Trial 11 finished with value: 0.020854700854700852 and parameters: {'num_leaves': 80, 'learning_rate': 0.046536185916944, 'feature_fraction': 0.43038830055522675, 'bagging_fraction': 0.1641740101428896, 'bagging_freq': 10, 'lambda_l1': 5.22739132843059, 'lambda_l2': 7.402815985019312}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:56,075][0m Trial 12 finished with value: 0.019206349206349203 and parameters: {'num_leaves': 98, 'learning_rate': 0.04308283769541807, 'feature_fraction': 0.8300345962664639, 'bagging_fraction': 0.72585045465804, 'bagging_freq': 8, 'lambda_l1': 7.807303937806677, 'lambda_l2': 9.070214461822154}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:56,906][0m Trial 13 finished with value: 0.01762962962962963 and parameters: {'num_leaves': 100, 'learning_rate': 0.06507867660184755, 'feature_fraction': 0.5342794144194033, 'bagging_fraction': 0.1564542839775493, 'bagging_freq': 7, 'lambda_l1': 4.911066652675495, 'lambda_l2': 9.972386657312379}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:58,066][0m Trial 14 finished with value: 0.016458333333333335 and parameters: {'num_leaves': 41, 'learning_rate': 0.03341295950082796, 'feature_fraction': 0.3485930089595683, 'bagging_fraction': 0.49362373409079296, 'bagging_freq': 1, 'lambda_l1': 7.5131944741024626, 'lambda_l2': 6.533237085257513}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:28:59,679][0m Trial 15 finished with value: 0.01607843137254902 and parameters: {'num_leaves': 73, 'learning_rate': 0.0719528041573175, 'feature_fraction': 0.8208581409426468, 'bagging_fraction': 0.7046783967276107, 'bagging_freq': 9, 'lambda_l1': 4.643470926071748, 'lambda_l2': 7.583980324052052}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:01,101][0m Trial 16 finished with value: 0.014999999999999998 and parameters: {'num_leaves': 50, 'learning_rate': 0.050676117119628335, 'feature_fraction': 0.5825357556740075, 'bagging_fraction': 0.9939562650283733, 'bagging_freq': 6, 'lambda_l1': 5.9839411466436445, 'lambda_l2': 5.939340706447167}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:02,039][0m Trial 17 finished with value: 0.013742690058479532 and parameters: {'num_leaves': 80, 'learning_rate': 0.07843619977159902, 'feature_fraction': 0.2597872556343864, 'bagging_fraction': 0.41564611003147023, 'bagging_freq': 6, 'lambda_l1': 8.588904951330981, 'lambda_l2': 4.08005630035445}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:02,943][0m Trial 18 finished with value: 0.013055555555555555 and parameters: {'num_leaves': 10, 'learning_rate': 0.05140682175013117, 'feature_fraction': 0.4900596330982708, 'bagging_fraction': 0.26878848880193396, 'bagging_freq': 2, 'lambda_l1': 6.694763496941361, 'lambda_l2': 8.082189615786714}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:04,163][0m Trial 19 finished with value: 0.01253968253968254 and parameters: {'num_leaves': 60, 'learning_rate': 0.0367778811579036, 'feature_fraction': 0.5985252785289987, 'bagging_fraction': 0.4549641990512372, 'bagging_freq': 9, 'lambda_l1': 8.654849037850983, 'lambda_l2': 8.378385403472116}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:05,851][0m Trial 20 finished with value: 0.01196969696969697 and parameters: {'num_leaves': 85, 'learning_rate': 0.056996992050812864, 'feature_fraction': 0.7772770564857281, 'bagging_fraction': 0.6185235700080992, 'bagging_freq': 8, 'lambda_l1': 4.109321161922685, 'lambda_l2': 6.762058837709336}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:06,941][0m Trial 21 finished with value: 0.011352657004830919 and parameters: {'num_leaves': 18, 'learning_rate': 0.05833480106751096, 'feature_fraction': 0.9816414481482668, 'bagging_fraction': 0.5528999221004297, 'bagging_freq': 3, 'lambda_l1': 6.716252047100299, 'lambda_l2': 1.0543071761370073}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:08,165][0m Trial 22 finished with value: 0.010925925925925926 and parameters: {'num_leaves': 28, 'learning_rate': 0.06534708858050499, 'feature_fraction': 0.8990918804326286, 'bagging_fraction': 0.6181991365241841, 'bagging_freq': 5, 'lambda_l1': 5.601007382277169, 'lambda_l2': 0.39719817095592536}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:09,288][0m Trial 23 finished with value: 0.010577777777777776 and parameters: {'num_leaves': 19, 'learning_rate': 0.05396395646820017, 'feature_fraction': 0.8918500887582161, 'bagging_fraction': 0.43157739477489376, 'bagging_freq': 3, 'lambda_l1': 6.520790353124092, 'lambda_l2': 2.2184519977742765}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:10,802][0m Trial 24 finished with value: 0.010085470085470087 and parameters: {'num_leaves': 44, 'learning_rate': 0.043501560039108225, 'feature_fraction': 0.9288994608398673, 'bagging_fraction': 0.5128183022721868, 'bagging_freq': 6, 'lambda_l1': 5.310143033358065, 'lambda_l2': 3.992600142243764}. Best is trial 0 with value: 0.16055555555555556.[0m




[32m[I 2023-03-13 17:29:12,393][0m Trial 25 finished with value: 0.009794238683127572 and parameters: {'num_leaves': 92, 'learning_rate': 0.07186961200234197, 'feature_fraction': 0.9979828320469294, 'bagging_fraction': 0.7071631786503675, 'bagging_freq': 7, 'lambda_l1': 7.2073417439239496, 'lambda_l2': 1.8793110935835653}. Best is trial 0 with value: 0.16055555555555556.[0m




In [None]:
study.trials_dataframe()

In [None]:
#各ランダムステートの正確度平均と標準偏差を出力
avg_auc_score = np.mean(auc_scores, axis=0)
std_score = np.std(auc_scores, axis=0)

# 平均と重み付き平均を計算する
precision_avg = np.mean(precision_scores, axis=0)
recall_avg = np.mean(recall_scores, axis=0)
f1_avg = np.mean(f1_scores, axis=0)
support_sum = np.sum(support_scores, axis=0)

# precision_weighted = np.average(precision_avg, weights=support_sum)
# recall_weighted = np.average(recall_avg, weights=support_sum)
# f1_weighted = np.average(f1_avg, weights=support_sum)

In [None]:
print(f"Std: {std_score:.4f}")
print(f"Average AUC score: {avg_auc_score:.4f}")
print(f"Average PRE score 0: {precision_avg[0]:.4f}"+f"   1: {precision_avg[1]:.4f}")
print(f"Average REC score 0: {recall_avg[0]:.4f}"+f"   1: {recall_avg[1]:.4f}")
print(f"Average F1  score 0: {f1_avg[0]:.4f}"+f"   1: {f1_avg[1]:.4f}")

In [None]:
print(reports[0])

In [None]:
print(reports[1])

In [None]:
print(reports[2])

In [None]:
print(reports[3])

In [None]:
print(reports[4])

In [29]:
#Optunaによる、ベストパラメータの取得
best_params = study.best_params

In [30]:
#結果を入れるリスト
reports = []
auc_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
support_scores = []

#ランダムステートを変更し、kfoldを繰り返す
for random_state in range(5):
    
    #予測結果を入れるリスト
    pred_df_list = []
    
    #kflodの設定
    kfold = KFold(n_splits=5, shuffle=True, random_state=random_state)
    
    #kflodで学習用と検証用に分ける
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_id, y)):
        X_train_id, X_val_id = X_id.iloc[train_idx], X_id.iloc[val_idx]
        
        X = X_id.drop(id, axis =1)
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        # ここでtrain_basketsを計算する
        train_baskets = X_train_id.groupby(["race_id"])["horse_id"].count().values
        
        best_ranker = LGBMRanker(**best_params)

        # モデルの学習
        model = best_ranker.fit(X_train,
                           y_train,
                           group=train_baskets)

        # モデルの予測
        y_pred = model.predict(X_val)

        #結果を入れるデータフレームを用意
        pred_df_fold = pd.DataFrame({
           "race_id": X_val_id['race_id'],
           "horse_id": X_val_id['horse_id'],
           "rank":y_val['rank'],
           "pred": y_pred
           })
        
        #そのデータフレームをリストに保存
        pred_df_list.append(pred_df_fold)

    # 各foldの予測結果を結合してpred_dfを作成
    pred_df = pd.concat(pred_df_list, axis=0).reset_index(drop=True)
    
    #予測した結果をrace_idごとでグループ化し順位をつける
    pred_df['pred_rank'] = pred_df.groupby('race_id')['pred'].rank(method='min', ascending=False)
    
    #3位以内ならprizeを1にする
    pred_df['rank_prize'] = pred_df['rank'].apply(lambda x: 1 if x <= 3 else 0)
    pred_df['pred_rank_prize'] = pred_df['pred_rank'].apply(lambda x: 1 if x <= 3 else 0)
    
    #正確度を計算
    auc_score = metrics.accuracy_score(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    report = metrics.classification_report(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    report_num = metrics.precision_recall_fscore_support(pred_df['rank_prize'], pred_df['pred_rank_prize'])
    #auc_scoresのリストに保存
    auc_scores.append(auc_score)
    
    reports.append(report)
    
    precision_scores.append(report_num[0])
    recall_scores.append(report_num[1])
    f1_scores.append(report_num[2])
    support_scores.append(report_num[3])





In [31]:
#各ランダムステートの正確度平均と標準偏差を出力
avg_auc_score = np.mean(auc_scores, axis=0)
std_score = np.std(auc_scores, axis=0)

# 平均と重み付き平均を計算する
precision_avg = np.mean(precision_scores, axis=0)
recall_avg = np.mean(recall_scores, axis=0)
f1_avg = np.mean(f1_scores, axis=0)
support_sum = np.sum(support_scores, axis=0)

In [32]:
print(f"Std: {std_score:.4f}")
print(f"Average AUC score: {avg_auc_score:.4f}")
print(f"Average PRE score 0: {precision_avg[0]:.4f}"+f"   1: {precision_avg[1]:.4f}")
print(f"Average REC score 0: {recall_avg[0]:.4f}"+f"   1: {recall_avg[1]:.4f}")
print(f"Average F1  score 0: {f1_avg[0]:.4f}"+f"   1: {f1_avg[1]:.4f}")

Std: 0.0034
Average AUC score: 0.6517
Average PRE score 0: 0.7786   1: 0.1838
Average REC score 0: 0.7786   1: 0.1838
Average F1  score 0: 0.7786   1: 0.1838


In [33]:
print(reports[0])

              precision    recall  f1-score   support

           0       0.78      0.78      0.78      6637
           1       0.19      0.19      0.19      1800

    accuracy                           0.65      8437
   macro avg       0.48      0.48      0.48      8437
weighted avg       0.65      0.65      0.65      8437



In [34]:
#特徴量の重要度
for i in model.feature_importances_.argsort()[::-1]:
    print(feature[i], model.feature_importances_[i]/model.feature_importances_.sum())

horse_weight 0.125
rider_id 0.11529126213592233
weather_rain 0.10436893203883495
sex_and_age 0.10194174757281553
burden_weight 0.07524271844660194
is_osu 0.07160194174757281
horse_id 0.06553398058252427
is_mesu 0.06553398058252427
ground_status 0.06067961165048544
horse_weight_dif 0.03519417475728155
race_id 0.032766990291262135
horse_number 0.03033980582524272
ground_type 0.025485436893203883
burden_weight_rate 0.02063106796116505
frame_number 0.01820388349514563
weather 0.01820388349514563
is_down 0.00849514563106796
total_horse_number 0.007281553398058253
is_senba 0.007281553398058253
race_round 0.006067961165048544
is_obstacle 0.0036407766990291263
owner_id 0.0012135922330097086
where_racecourse 0.0
distance 0.0
is_left_right_straight 0.0
tamer_id 0.0
