### ノック６１：フォルダ生成をして機械学習用データを読み込もう

In [1]:
# フォルダ作成
import os
data_dir = 'data'
input_dir = os.path.join(data_dir, '0_input')
output_dir = os.path.join(data_dir, '1_output')
os.makedirs(input_dir,exist_ok=True)
os.makedirs(output_dir,exist_ok=True)

In [2]:
# データの読み込み
import pandas as pd
ml_data_file = 'ml_base_data.csv'
ml_data = pd.read_csv(os.path.join(input_dir, ml_data_file))
ml_data.head(3)

Unnamed: 0,store_name,y_weekday,y_weekend,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,...,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21,delta_avg,year_month
0,あきる野店,1.0,0.0,1147,945,202,841,306,844,303,...,101,95,107,106,100,108,109,96,34.110053,201904
1,さいたま南店,1.0,1.0,1504,1217,287,1105,399,1104,400,...,143,142,137,130,113,140,132,155,35.337716,201904
2,さいたま緑店,1.0,1.0,1028,847,181,756,272,756,272,...,95,102,82,90,93,95,95,84,34.291617,201904


### ノック６２：カテゴリカル変数の対応をしよう

In [3]:
# 1hot encodingで store_name をカテゴリ分け
category_data = pd.get_dummies(ml_data['store_name'], prefix='store' ,prefix_sep='_')
display(category_data.head(3))

Unnamed: 0,store_あきる野店,store_さいたま南店,store_さいたま緑店,store_さいたま西店,store_つくば店,store_三浦店,store_三鷹店,store_上尾店,store_上野店,store_世田谷店,...,store_駒込店,store_高円寺店,store_高島平店,store_高崎店,store_高座店,store_高津店,store_高田馬場店,store_鴻巣店,store_鶴見店,store_麻生店
0,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [4]:
# カテゴリカル変数の結合
# カテゴリカル変数は1列消すのが一般的なため、今回は例としてstore_麻生店を削除。（あっても意味無い & 多重共線性の防止）
del category_data['store_麻生店']
del ml_data['year_month']  # year_month は不要なため削除
del ml_data['store_name']
ml_data = pd.concat([ml_data, category_data],axis=1)
ml_data.columns

Index(['y_weekday', 'y_weekend', 'order', 'order_fin', 'order_cancel',
       'order_delivery', 'order_takeout', 'order_weekday', 'order_weekend',
       'order_time_11',
       ...
       'store_駒沢店', 'store_駒込店', 'store_高円寺店', 'store_高島平店', 'store_高崎店',
       'store_高座店', 'store_高津店', 'store_高田馬場店', 'store_鴻巣店', 'store_鶴見店'],
      dtype='object', length=215)

In [5]:
ml_data.head(5)

Unnamed: 0,y_weekday,y_weekend,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,order_time_11,...,store_駒沢店,store_駒込店,store_高円寺店,store_高島平店,store_高崎店,store_高座店,store_高津店,store_高田馬場店,store_鴻巣店,store_鶴見店
0,1.0,0.0,1147,945,202,841,306,844,303,91,...,False,False,False,False,False,False,False,False,False,False
1,1.0,1.0,1504,1217,287,1105,399,1104,400,130,...,False,False,False,False,False,False,False,False,False,False
2,1.0,1.0,1028,847,181,756,272,756,272,95,...,False,False,False,False,False,False,False,False,False,False
3,1.0,0.0,1184,980,204,852,332,870,314,122,...,False,False,False,False,False,False,False,False,False,False
4,1.0,1.0,1267,1058,209,928,339,936,331,122,...,False,False,False,False,False,False,False,False,False,False


### ノック６３：学習データとテストデータを分割しよう

In [6]:
# 学習データとテストデータを分割
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(ml_data, test_size=0.3, random_state=0)

print(f'Train：{len(train_data)}件/ Test:{len(test_data)}')
print(f'Weekday Train0：{len(train_data.loc[train_data["y_weekday"]==0])}件')
print(f'Weekday Train1：{len(train_data.loc[train_data["y_weekday"]==1])}件')
print(f'Weekday Test0：{len(test_data.loc[test_data["y_weekday"]==0])}件')
print(f'Weekday Test1：{len(test_data.loc[test_data["y_weekday"]==1])}件')

print(f'Weekend Train0：{len(train_data.loc[train_data["y_weekend"]==0])}件')
print(f'Weekend Train1：{len(train_data.loc[train_data["y_weekend"]==1])}件')
print(f'Weekend Test0：{len(test_data.loc[test_data["y_weekend"]==0])}件')
print(f'Weekend Test1：{len(test_data.loc[test_data["y_weekend"]==1])}件')

Train：1501件/ Test:644
Weekday Train0：685件
Weekday Train1：816件
Weekday Test0：290件
Weekday Test1：354件
Weekend Train0：708件
Weekend Train1：793件
Weekend Test0：295件
Weekend Test1：349件


### ノック６４：１つのモデルを構築しよう（Weekday モデル）

In [7]:
# 説明変数と目的変数の作成
X_cols = list(train_data.columns)
X_cols.remove('y_weekday')
X_cols.remove('y_weekend')
target_y = 'y_weekday'

y_train = train_data[target_y]
X_train = train_data[X_cols]
y_test = test_data[target_y]
X_test = test_data[X_cols]
display(y_train.head(3))
display(X_train.head(3))

1137    1.0
971     0.0
1983    1.0
Name: y_weekday, dtype: float64

Unnamed: 0,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,order_time_11,order_time_12,order_time_13,...,store_駒沢店,store_駒込店,store_高円寺店,store_高島平店,store_高崎店,store_高座店,store_高津店,store_高田馬場店,store_鴻巣店,store_鶴見店
1137,977,809,168,724,253,685,292,102,88,84,...,False,False,False,False,False,False,False,False,False,False
971,1099,904,195,816,283,779,320,99,102,101,...,False,False,False,False,False,False,False,True,False,False
1983,966,794,172,724,242,671,295,80,95,87,...,False,False,False,False,False,False,False,False,False,False


In [8]:
# 決定木モデルの構築
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=0)
model.fit(X_train, y_train)

### ノック６５：評価を実施してみよう

In [9]:
# 構築したモデルでの予測結果
# "1"はオーダー数が増加という予想がされている
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)
y_pred_test

array([0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 1.,
       1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0.,
       1., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1.,
       0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0.,
       0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1.,
       0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0.,
       1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0.,
       1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0.,
       0., 1., 1., 0., 0.

In [10]:
# 時間帯別オーダー数の集計
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)
f1_train = f1_score(y_train, y_pred_train)
f1_test = f1_score(y_test, y_pred_test)
recall_train = recall_score(y_train, y_pred_train)
recall_test = recall_score(y_test, y_pred_test)
precision_train = precision_score(y_train, y_pred_train)
precision_test = precision_score(y_test, y_pred_test)

print(f'【正解率】Train：{round(acc_train,2)} Test：{round(acc_test, 2)}')
print(f'【F値】Train：{round(f1_train,2)} Test：{round(f1_test, 2)}')
print(f'【再現率】Train：{round(recall_train,2)} Test：{round(recall_test, 2)}')
print(f'【適合率】Train：{round(precision_train,2)} Test：{round(precision_test, 2)}')

【正解率】Train：1.0 Test：0.82
【F値】Train：1.0 Test：0.84
【再現率】Train：1.0 Test：0.82
【適合率】Train：1.0 Test：0.86


In [11]:
# 混同行列の表示（[[TN, FP], [FN, TP]] の行列）
print(confusion_matrix(y_train, y_pred_train))
print(confusion_matrix(y_test, y_pred_test))

[[685   0]
 [  0 816]]
[[241  49]
 [ 64 290]]


In [12]:
# 混同行列データの格納して取り出せるようにしておく（[TN, FP, FN, TP] の1次行列）
tn_train, fp_train, fn_train, tp_train = confusion_matrix(y_train, y_pred_train).ravel()  # .ravel(): 1次元配列に変換するメソッド
tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test, y_pred_test).ravel()
print(f'【混同行列】Train：{tn_train}, {fp_train}, {fn_train}, {tp_train}')
print(f'【混同行列】Test：{tn_test}, {fp_test}, {fn_test}, {tp_test}')

【混同行列】Train：685, 0, 0, 816
【混同行列】Test：241, 49, 64, 290


In [13]:
# 精度指標をDataFrame化
score_train = pd.DataFrame({'DataCategory':['train'],'acc':[acc_train],'f1':[f1_train],
                            'recall':[recall_train],'precision':[precision_train],
                            'tp':[tp_train],'fn':[fn_train],'fp':[fp_train],'tn':[tn_train]})

score_test = pd.DataFrame({'DataCategory':['test'], 'acc':[acc_test],'f1':[f1_test],
                            'recall':[recall_test],'precision':[precision_test],
                            'tp':[tp_test],'fn':[fn_test],'fp':[fp_test],'tn':[tn_test]})

score = pd.concat([score_train,score_test], ignore_index=True)
score

Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.824534,0.836941,0.819209,0.855457,290,64,49,241


### ノック６６：モデルの重要度を確認してみよう

In [14]:
# 構築しているモデルに寄与している変数の上位10件を取得
importance = pd.DataFrame({'cols':X_train.columns, 'importance':model.feature_importances_})  # feature_importances_: 各変数の寄与度を算出する
importance = importance.sort_values('importance', ascending=False)
importance.head(10)

Unnamed: 0,cols,importance
5,order_weekday,0.369241
6,order_weekend,0.346013
18,delta_avg,0.02743
2,order_cancel,0.026031
12,order_time_16,0.024161
8,order_time_12,0.023256
3,order_delivery,0.020037
17,order_time_21,0.018936
10,order_time_14,0.017932
11,order_time_15,0.017163


### ノック６７：モデル構築から評価までを関数化しよう

In [15]:
# モデル構築から評価までを関数化
# モデルを引数として渡す構造としている。モデルの定義を関数の外でできるようにするため。
def make_model_and_eval(model, X_train, X_test, y_train, y_test):

    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    f1_train = f1_score(y_train, y_pred_train)
    f1_test = f1_score(y_test, y_pred_test)
    recall_train = recall_score(y_train, y_pred_train)
    recall_test = recall_score(y_test, y_pred_test)
    precision_train = precision_score(y_train, y_pred_train)
    precision_test = precision_score(y_test, y_pred_test)
    tn_train, fp_train, fn_train, tp_train = confusion_matrix(y_train, y_pred_train).ravel()
    tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test, y_pred_test).ravel()

    score_train = pd.DataFrame({'DataCategory':['train'],'acc':[acc_train],'f1':[f1_train],
                                'recall':[recall_train],'precision':[precision_train],
                                'tp':[tp_train],'fn':[fn_train],'fp':[fp_train],'tn':[tn_train]})
    score_test = pd.DataFrame({'DataCategory':['test'], 'acc':[acc_test],'f1':[f1_test],
                                'recall':[recall_test],'precision':[precision_test],
                                'tp':[tp_test],'fn':[fn_test],'fp':[fp_test],'tn':[tn_test]})

    score = pd.concat([score_train,score_test], ignore_index=True)
    importance = pd.DataFrame({'cols':X_train.columns, 'importance':model.feature_importances_})
    importance = importance.sort_values('importance', ascending=False)

    # X_trainをモデルに投入しているため、その列名を取得
    cols = pd.DataFrame({'X_cols':X_train.columns})
    display(score)

    return score, importance, model, cols

In [16]:
# 上記の関数を使用した決定木モデルの構築と評価（ノック65と同じ結果となる）
model = DecisionTreeClassifier(random_state=0)
score, importance, model, cols = make_model_and_eval(model, X_train, X_test, y_train, y_test)

Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.824534,0.836941,0.819209,0.855457,290,64,49,241


### ノック６８：モデルファイルや評価結果を出力しよう

In [17]:
# 今後の試行錯誤を想定して、上書きしないように出力フォルダを作成
import datetime
now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
target_output_dir_name = 'results_' + now
target_output_dir = os.path.join(output_dir, target_output_dir_name)
os.makedirs(target_output_dir, exist_ok=True)
print(target_output_dir)

data\1_output\results_20240305203115


In [18]:
# 作成した推測モデルをPickleライブラリを用いて保存
score_name = 'score.csv'
importance_name = 'importance.csv'
cols_name = 'X_cols.csv'
model_name = 'model.pickle'

score_path = os.path.join(target_output_dir, score_name)
importance_path = os.path.join(target_output_dir, importance_name)
cols_path = os.path.join(target_output_dir, cols_name)
model_path = os.path.join(target_output_dir, model_name)

score.to_csv(score_path, index=False)
importance.to_csv(importance_path, index=False)
cols.to_csv(cols_path, index=False)

import pickle
with open(model_path, mode='wb') as f:
    pickle.dump(model, f, protocol=2)

### ノック６９：アルゴリズムを拡張して多角的な評価を実施しよう

In [19]:
# ランダムフォレストと勾配ブースティングの両方を用いた"アンサンブル学習"により、更に精度の高い機械学習を実行
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

models = {'DecisionTree': DecisionTreeClassifier(random_state=0),
          'RandomForest':RandomForestClassifier(random_state=0),
          'GradientBoostingClassifier':GradientBoostingClassifier(random_state=0)}

now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
target_output_dir_name = 'results_' + now
target_output_dir = os.path.join(output_dir, target_output_dir_name)
os.makedirs(target_output_dir, exist_ok=True)
print(target_output_dir)

score_all = []
importance_all = []

for model_name, model in models.items():
    print(model_name)
    score, importance, model, cols = make_model_and_eval(model, X_train, X_test, y_train, y_test)
    score['model_name'] = model_name
    importance['model_name'] = model_name

    model_name = f'model_{model_name}.pickle'
    model_path = os.path.join(target_output_dir, model_name)

    with open(model_path, mode='wb') as f:
        pickle.dump(model, f, protocol=2)
    score_all.append(score)
    importance_all.append(importance)

score_all = pd.concat(score_all, ignore_index=True)
importance_all = pd.concat(importance_all, ignore_index=True)
cols = pd.DataFrame({'X_cols':X_train.columns})

score_name = 'score.csv'
importance_name = 'importance.csv'
cols_name = 'X_cols.csv'

score_path = os.path.join(target_output_dir, score_name)
importance_path = os.path.join(target_output_dir, importance_name)
cols_path = os.path.join(target_output_dir, cols_name)

score_all.to_csv(score_path, index=False)
importance_all.to_csv(importance_path, index=False)
cols.to_csv(cols_path, index=False)

data\1_output\results_20240305203115
DecisionTree


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.824534,0.836941,0.819209,0.855457,290,64,49,241


RandomForest


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.807453,0.823864,0.819209,0.828571,290,64,60,230


GradientBoostingClassifier


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,0.875416,0.887009,0.89951,0.874851,734,82,105,580
1,test,0.824534,0.839716,0.836158,0.843305,296,58,55,235


過学習の兆候：  
トレーニングセットでの高パフォーマンス: トレーニングデータに対する精度や再現率が非常に高い。  
テストセットでの低パフォーマンス: テストデータに対する精度や再現率がトレーニングデータに比べて顕著に低い。  
パフォーマンスの差異: トレーニングセットとテストセットのパフォーマンス指標の差異が大きい。  

=> 今回のノック６９では、勾配ブースティングが一番汎化性能が高いと思われる。

### ノック７０：平日/休日モデルを一度に回せるようにする

In [20]:
# 基本的にはノック６９と同じ

X_cols = list(train_data.columns)
X_cols.remove('y_weekday')
X_cols.remove('y_weekend')
targets_y = ['y_weekday', 'y_weekend']

now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
target_output_dir_name = 'results_' + now
target_output_dir = os.path.join(output_dir, target_output_dir_name)
os.makedirs(target_output_dir,exist_ok=True)
print(target_output_dir)

score_all = []
importance_all = []

for target_y in targets_y:
    y_train = train_data[target_y]
    X_train = train_data[X_cols]
    y_test = test_data[target_y]
    X_test = test_data[X_cols]

    models = {'DecisionTree': DecisionTreeClassifier(random_state=0),
              'RandomForest':RandomForestClassifier(random_state=0),
              'GradientBoosting':GradientBoostingClassifier(random_state=0)}

    for model_name, model in models.items():

        print(model_name)
        score, importance, model, cols = make_model_and_eval(model, X_train, X_test, y_train, y_test)

        score['model_name'] = model_name
        importance['model_name'] = model_name
        score['model_target'] = target_y
        importance['model_target'] = target_y

        model_name = f'model_{target_y}_{model_name}.pickle'
        model_path = os.path.join(target_output_dir, model_name)

        with open(model_path, mode='wb') as f:
            pickle.dump(model, f, protocol=2)

        score_all.append(score)
        importance_all.append(importance)

score_all = pd.concat(score_all, ignore_index=True)
importance_all = pd.concat(importance_all, ignore_index=True)
cols = pd.DataFrame({'X_cols':X_train.columns})

score_name = 'score.csv'
importance_name = 'importance.csv'
cols_name = 'X_cols.csv'

score_path = os.path.join(target_output_dir, score_name)
importance_path = os.path.join(target_output_dir, importance_name)
cols_path = os.path.join(target_output_dir, cols_name)

score_all.to_csv(score_path, index=False)
importance_all.to_csv(importance_path, index=False)
cols.to_csv(cols_path, index=False)

data\1_output\results_20240305203116
DecisionTree


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.824534,0.836941,0.819209,0.855457,290,64,49,241


RandomForest


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,816,0,0,685
1,test,0.807453,0.823864,0.819209,0.828571,290,64,60,230


GradientBoosting


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,0.875416,0.887009,0.89951,0.874851,734,82,105,580
1,test,0.824534,0.839716,0.836158,0.843305,296,58,55,235


DecisionTree


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,793,0,0,708
1,test,0.718944,0.738061,0.730659,0.745614,255,94,87,208


RandomForest


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,1.0,1.0,1.0,1.0,793,0,0,708
1,test,0.793478,0.804699,0.7851,0.825301,274,75,58,237


GradientBoosting


Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn
0,train,0.854763,0.855629,0.814628,0.900976,646,147,71,637
1,test,0.810559,0.817365,0.782235,0.855799,273,76,46,249


=> 同様に、勾配ブースティングが一番汎化性能が高いと思われる

In [21]:
# 一覧比較
score_all.loc[score_all['model_target']=='y_weekday']

Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn,model_name,model_target
0,train,1.0,1.0,1.0,1.0,816,0,0,685,DecisionTree,y_weekday
1,test,0.824534,0.836941,0.819209,0.855457,290,64,49,241,DecisionTree,y_weekday
2,train,1.0,1.0,1.0,1.0,816,0,0,685,RandomForest,y_weekday
3,test,0.807453,0.823864,0.819209,0.828571,290,64,60,230,RandomForest,y_weekday
4,train,0.875416,0.887009,0.89951,0.874851,734,82,105,580,GradientBoosting,y_weekday
5,test,0.824534,0.839716,0.836158,0.843305,296,58,55,235,GradientBoosting,y_weekday


In [22]:
# 一覧比較
score_all.loc[score_all['model_target']=='y_weekend']

Unnamed: 0,DataCategory,acc,f1,recall,precision,tp,fn,fp,tn,model_name,model_target
6,train,1.0,1.0,1.0,1.0,793,0,0,708,DecisionTree,y_weekend
7,test,0.718944,0.738061,0.730659,0.745614,255,94,87,208,DecisionTree,y_weekend
8,train,1.0,1.0,1.0,1.0,793,0,0,708,RandomForest,y_weekend
9,test,0.793478,0.804699,0.7851,0.825301,274,75,58,237,RandomForest,y_weekend
10,train,0.854763,0.855629,0.814628,0.900976,646,147,71,637,GradientBoosting,y_weekend
11,test,0.810559,0.817365,0.782235,0.855799,273,76,46,249,GradientBoosting,y_weekend


In [23]:
# Weekdayを対象とした勾配ブースティングモデルで、寄与度の高い変数を確認
importance_all.loc[(importance_all['model_target']=='y_weekday')&
                   (importance_all['model_name']=='GradientBoosting')].head(10)

Unnamed: 0,cols,importance,model_name,model_target
426,order_weekend,0.484104,GradientBoosting,y_weekday
427,order_weekday,0.356882,GradientBoosting,y_weekday
428,order,0.024794,GradientBoosting,y_weekday
429,delta_avg,0.014385,GradientBoosting,y_weekday
430,order_cancel,0.010827,GradientBoosting,y_weekday
431,order_time_21,0.009972,GradientBoosting,y_weekday
432,order_time_14,0.009645,GradientBoosting,y_weekday
433,order_fin,0.009442,GradientBoosting,y_weekday
434,order_takeout,0.00942,GradientBoosting,y_weekday
435,order_time_12,0.008361,GradientBoosting,y_weekday
