In [3]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import os, sys, gc, time, warnings, pickle, psutil, random
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
from multiprocessing import Pool

warnings.filterwarnings('ignore')

## 必要なファイル

In [2]:
SAMPLE_CSV   = '../data/sample_submission.csv'
PRICE_CSV    = '../data/sell_prices.csv'
EVALUATION   = '../20200602_追加データの確認/sales_train_evaluation.csv'

# 店舗ごとのマスタデータフレームが格納されているフォルダへのパス
GRID_PATH = '../20200619_28個モデル作る/'

# 日と店舗ごとの予測値が格納されているフォルダへのパス
PREDICT_PATH = './'

# WRMSSE計算用のpklのパス(publicLBに合わせるために一つ前のものを設定)
SW_DF = "../20200421_ハイスコアカーネルその１/sw_df.pkl"
ROLL_MAT = "../20200421_ハイスコアカーネルその１/roll_mat_df.pkl"

In [4]:
# 店舗のリスト
STORES_IDS = pd.read_csv(EVALUATION)['store_id']
STORES_IDS = list(STORES_IDS.unique())

In [5]:
# 結果を格納するデータフレームのベース
sample_df = pd.read_csv(SAMPLE_CSV)

## スコア確認用の関数を定義

In [6]:
sw_df = pd.read_pickle(SW_DF)
S = sw_df.s.values
W = sw_df.w.values
SW = sw_df.sw.values

roll_mat_df = pd.read_pickle(ROLL_MAT)
roll_index = roll_mat_df.index
roll_mat_csr = csr_matrix(roll_mat_df.values)

In [7]:
def rollup(v):
    '''
    v - np.array of size (30490 rows, n day columns)
    v_rolledup - array of size (n, 42840)
    '''
    return roll_mat_csr*v #(v.T*roll_mat_csr.T).T

# Function to calculate WRMSSE:
def wrmsse_metric(preds, y_true, score_only=False, s = S, w = W, sw=SW, level=None):
    '''
    preds - Predictions: pd.DataFrame of size (30490 rows, N day columns)
    y_true - True values: pd.DataFrame of size (30490 rows, N day columns)
    sequence_length - np.array of size (42840,)
    sales_weight - sales weights based on last 28 days: np.array (42840,)
    '''
    
    if score_only:
        scores = np.sqrt(
                    np.mean(
                        np.square(rollup(preds.values-y_true.values))
                            ,axis=1)) * sw
        if level is None:
            return np.sum(scores)/12
        
        score_spans = [
            (0, 1), (1, 4), (4, 14), (14, 17), (17, 24), (24, 33),
            (33, 54), (54, 84), (84, 154), (154, 3203), (3203, 12350), (12350, 42840)           
        ]
        score = np.sum(scores[score_spans[level][0]:score_spans[level][1]], axis=0)/12
        return score
    else: 
        score_matrix = (np.square(rollup(preds.values-y_true.values)) * np.square(w)[:, None])/ s[:, None]
        score = np.sum(np.sqrt(np.mean(score_matrix,axis=1)))/12 #<-used to be mistake here
        return score, score_matrix

## 日毎のsell_priceの値を格納するdictを準備する

In [8]:
price_dict = {}

# priceを格納しているデータフレームを生成する
for store in STORES_IDS:
    
    # 店舗ごとのデータフレームを読み込む
    tmp = pd.read_pickle(GRID_PATH+f"grid_df_master_{store}.pkl")
    tmp = tmp[["id", "d", "sell_price"]]
    
    store_dict = {}
    for i in range(1858, 1970):
        store_dict[i] = tmp[tmp["d"]==i]
    
    price_dict[store] = store_dict

In [9]:
# 使用例
price_dict["CA_1"][1969].head()

Unnamed: 0,id,d,sell_price
4937910,HOBBIES_1_001_CA_1_evaluation,1969,8.382812
4937911,HOBBIES_1_002_CA_1_evaluation,1969,3.970703
4937912,HOBBIES_1_003_CA_1_evaluation,1969,2.970703
4937913,HOBBIES_1_004_CA_1_evaluation,1969,4.640625
4937914,HOBBIES_1_005_CA_1_evaluation,1969,2.880859


## CVスコアを確認する

In [29]:
%%time

for START_DAY in [1858-1, 1886-1, 1914-1]:

    # validation期間について、値を格納していく
    validation_base = pd.DataFrame({"id":sample_df[:30490]["id"]})

    for day in range(1, 28+1):

        #### n日後の予測結果をvalidation_baseに結合する #### 
        day_predict = pd.DataFrame()

        for store in STORES_IDS:

            # priceを格納したデータフレーム
            price_df = price_dict[store][START_DAY+day]

            # 予測値を格納したデータフレーム
            tmp = pd.read_pickle(PREDICT_PATH + f"result_{START_DAY+1}/save_{store}_day28.pkl")
            tmp = tmp[tmp["d"]==(START_DAY+day)]

            # 予測値を""sell_price*sales""から""sales""に戻す
            tmp = tmp.merge(price_df, on=["id", "d"], how="left")
            tmp["pred"] = tmp["pred"]/tmp["sell_price"]
            day_predict = pd.concat([day_predict, tmp])

        # day_predictはid順になっているので、そのまま結合する
        validation_base["F"+str(day)] = day_predict["pred"].values
        
    sales = pd.read_csv(EVALUATION)
    dayCols = ["d_{}".format(i) for i in range(START_DAY+1, START_DAY+28+1)]
    y_true = sales[dayCols]

    print(f"-------------------------------------------------")
    print(f"BASE_MODEL_SCORE : {START_DAY+1} - {START_DAY+29}")
    print(f"-------------------------------------------------")
    score = wrmsse_metric(validation_base.drop("id", axis=1), y_true, score_only=True)
    print(score)
    
    validation_base.to_csv(f"20200627_base_model_predict_rmse_{START_DAY+1}.csv", index=False)

-------------------------------------------------
BASE_MODEL_SCORE : 1858 - 1886
-------------------------------------------------
0.613980701348
-------------------------------------------------
BASE_MODEL_SCORE : 1886 - 1914
-------------------------------------------------
0.492560483918
-------------------------------------------------
BASE_MODEL_SCORE : 1914 - 1942
-------------------------------------------------
0.542075919022
CPU times: user 1min, sys: 3.34 s, total: 1min 3s
Wall time: 1min 1s


## ちゃんと予測できてるかを確認する

In [18]:
tmp = pd.read_pickle("./result_1942/submit_CA_1_day28.pkl")

In [19]:
tmp

Unnamed: 0,d,id,sales_lag_1,pred
4770215,1914,HOBBIES_1_001_CA_1_evaluation,1.0,6.614844
4770216,1914,HOBBIES_1_002_CA_1_evaluation,1.0,1.283952
4770217,1914,HOBBIES_1_003_CA_1_evaluation,0.0,1.384524
4770218,1914,HOBBIES_1_004_CA_1_evaluation,0.0,7.383232
4770219,1914,HOBBIES_1_005_CA_1_evaluation,1.0,3.294492
...,...,...,...,...
4940954,1969,FOODS_3_823_CA_1_evaluation,2.0,5.025110
4940955,1969,FOODS_3_824_CA_1_evaluation,0.0,2.742777
4940956,1969,FOODS_3_825_CA_1_evaluation,1.0,4.805370
4940957,1969,FOODS_3_826_CA_1_evaluation,1.0,1.628781


## 提出用ファイルを作成する

In [21]:
%%time

for START_DAY in [1914-1]:

    # validation期間について、値を格納していく
    validation_base = pd.DataFrame({"id":sample_df[:30490]["id"]})

    for day in range(1, 28+1):

        #### n日後の予測結果をvalidation_baseに結合する #### 
        day_predict = pd.DataFrame()

        for store in STORES_IDS:

            # priceを格納したデータフレーム
            price_df = price_dict[store][START_DAY+day]

            # 予測値を格納したデータフレーム
            tmp = pd.read_pickle(PREDICT_PATH + f"result_1942/submit_{store}_day28.pkl")
            tmp = tmp[tmp["d"]==(START_DAY+day)]

            # 予測値を""sell_price*sales""から""sales""に戻す
            tmp = tmp.merge(price_df, on=["id", "d"], how="left")
            tmp["pred"] = tmp["pred"]/tmp["sell_price"]
            day_predict = pd.concat([day_predict, tmp])

        # day_predictはid順になっているので、そのまま結合する
        validation_base["F"+str(day)] = day_predict["pred"].values
        
    sales = pd.read_csv(EVALUATION)
    dayCols = ["d_{}".format(i) for i in range(START_DAY+1, START_DAY+28+1)]
    y_true = sales[dayCols]

    print(f"-------------------------------------------------")
    print("BASE_MODEL_SCORE : 1914 - 1942")
    print(f"-------------------------------------------------")
    score = wrmsse_metric(validation_base.drop("id", axis=1), y_true, score_only=True)
    print(score)

-------------------------------------------------
BASE_MODEL_SCORE : 1914 - 1942
-------------------------------------------------
0.428533482545
CPU times: user 10.4 s, sys: 666 ms, total: 11 s
Wall time: 11 s


In [24]:
for START_DAY in [1942-1]:

    # validation期間について、値を格納していく
    evaluation_base = pd.DataFrame({"id":sample_df[30490:]["id"]})

    for day in range(1, 28+1):

        #### n日後の予測結果をvalidation_baseに結合する #### 
        day_predict = pd.DataFrame()

        for store in STORES_IDS:

            # priceを格納したデータフレーム
            price_df = price_dict[store][START_DAY+day]

            # 予測値を格納したデータフレーム
            tmp = pd.read_pickle(PREDICT_PATH + f"result_1942/submit_{store}_day28.pkl")
            tmp = tmp[tmp["d"]==(START_DAY+day)]

            # 予測値を""sell_price*sales""から""sales""に戻す
            tmp = tmp.merge(price_df, on=["id", "d"], how="left")
            tmp["pred"] = tmp["pred"]/tmp["sell_price"]
            day_predict = pd.concat([day_predict, tmp])

        # day_predictはid順になっているので、そのまま結合する
        #validation_base["F"+str(day)] = day_predict["pred"].values
        
        day_predict["F"+str(day)] = day_predict["pred"]
        evaluation_base = evaluation_base.merge(day_predict[["id","F"+str(day)]], on="id", how="left")

In [25]:
evaluation_base

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_evaluation,0.749844,0.691505,0.709751,0.748991,0.819571,1.020328,1.056195,0.950223,0.752632,...,1.024478,1.287194,1.180235,0.879563,0.828045,0.808681,0.871101,1.024339,1.137751,1.142378
1,HOBBIES_1_002_CA_1_evaluation,0.251049,0.233350,0.237094,0.218450,0.258881,0.313010,0.305572,0.249545,0.170138,...,0.210723,0.309448,0.369183,0.270761,0.250107,0.242359,0.228892,0.277226,0.361409,0.349340
2,HOBBIES_1_003_CA_1_evaluation,0.509158,0.430901,0.451213,0.441287,0.543765,0.738067,0.676897,0.623651,0.488829,...,0.627875,0.747556,0.781885,0.509812,0.465765,0.446835,0.493180,0.673430,0.804518,0.766809
3,HOBBIES_1_004_CA_1_evaluation,1.689654,1.264372,1.259381,1.293696,1.805726,2.416164,2.724446,2.193506,1.361894,...,1.760945,2.517178,2.542210,1.837701,1.402041,1.410806,1.441587,1.707604,2.368963,2.740457
4,HOBBIES_1_005_CA_1_evaluation,1.026942,0.910759,0.904766,1.000714,1.098078,1.340866,1.450220,1.294430,1.069696,...,1.365140,1.541681,1.558873,1.231548,1.181581,1.094805,1.040320,1.187033,1.569443,1.386291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,FOODS_3_823_WI_3_evaluation,0.472858,0.468764,0.361801,0.371391,0.484152,0.481016,0.445929,0.399283,0.420987,...,0.495057,0.620627,0.725444,0.547078,0.620384,0.615473,0.497104,0.541869,0.615579,0.675886
30486,FOODS_3_824_WI_3_evaluation,0.274502,0.286647,0.334226,0.352071,0.363768,0.415947,0.345596,0.298116,0.312707,...,0.318439,0.375520,0.366393,0.281221,0.284385,0.301528,0.338915,0.311273,0.390095,0.361472
30487,FOODS_3_825_WI_3_evaluation,0.627415,0.527214,0.534817,0.568063,0.631113,0.818809,0.682502,0.721813,0.538353,...,0.765911,0.970312,1.031501,0.835226,0.834750,0.791228,0.696158,0.759505,0.790970,0.852268
30488,FOODS_3_826_WI_3_evaluation,0.933302,0.999347,0.855584,0.925638,1.102328,1.195947,0.992824,1.028457,0.881844,...,1.023473,1.266399,1.234555,1.032747,1.135887,0.995107,1.036538,1.087171,1.252967,1.117115


In [26]:
result = pd.concat([validation_base, evaluation_base])

In [27]:
result.to_csv("20200627_base_model_predict_rmse_submit.csv", index=False)

In [10]:
%%time

START_DAY = 1942-1

# validation期間について、値を格納していく
evaluation_base = pd.DataFrame({"id":sample_df[30490:]["id"]})

for day in range(1, 28+1):
    
    # print(f"processing...  day-{day}")
    
    #### n日後の予測結果をevaluation_baseに結合する #### 
    day_predict = pd.DataFrame()
    
    for store in STORES_IDS:
        
        # priceを格納したデータフレーム
        price_df = price_dict[store][START_DAY+day]
        
        # 予測値を格納したデータフレーム
        tmp = pd.read_pickle(PREDICT_PATH + f"submit_{store}_day{day}.pkl")
        tmp = tmp[tmp["d"]==(START_DAY+day)]
        
        # 予測値を""sell_price*sales""から""sales""に戻す
        tmp = tmp.merge(price_df, on=["id", "d"], how="left")
        tmp["pred"] = tmp["pred"]/tmp["sell_price"]
        day_predict = pd.concat([day_predict, tmp])
    
    # day_predictはid順になっているので、そのまま結合する
    # evaluation_base["F"+str(day)] = day_predict["pred"].values
    
    # 安全のため、idでmergeする
    day_predict["F"+str(day)] = day_predict["pred"]
    evaluation_base = evaluation_base.merge(day_predict[["id","F"+str(day)]], on="id", how="left")

CPU times: user 5.03 s, sys: 285 ms, total: 5.31 s
Wall time: 5.31 s


In [11]:
result = pd.concat([validation_base, evaluation_base])

In [137]:
result

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.793877,0.747731,0.783910,0.817366,0.975276,1.282243,1.164445,0.977888,0.884113,...,0.915187,1.088486,1.008392,0.923145,0.729736,0.775895,0.796517,0.923264,1.003750,0.978838
1,HOBBIES_1_002_CA_1_validation,0.293176,0.275680,0.249223,0.240530,0.261211,0.305128,0.323074,0.208948,0.232060,...,0.286546,0.370702,0.375089,0.267513,0.195921,0.236790,0.234878,0.276099,0.345153,0.357989
2,HOBBIES_1_003_CA_1_validation,0.475781,0.412732,0.414113,0.432541,0.572805,0.774625,0.747049,0.437051,0.470890,...,0.561403,0.724256,0.738335,0.537874,0.492867,0.467662,0.484097,0.545662,0.711785,0.741483
3,HOBBIES_1_004_CA_1_validation,1.631193,1.340382,1.376494,1.368842,1.907489,2.636012,3.299511,1.823796,1.474890,...,1.658987,2.681150,3.363057,1.886732,1.488391,1.449341,1.385049,1.814707,2.936432,3.072670
4,HOBBIES_1_005_CA_1_validation,1.064908,0.916209,0.988002,1.026198,1.227318,1.496321,1.572897,1.318595,1.218297,...,1.225196,1.429625,1.710407,1.173649,1.053207,1.060555,1.017695,1.152914,1.391641,1.516309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60975,FOODS_3_823_WI_3_evaluation,0.488158,0.444063,0.417143,0.371955,0.457751,0.504620,0.483063,0.442396,0.472410,...,0.528832,0.828136,0.883842,0.622964,0.672882,0.663403,0.523271,0.576451,0.709631,0.675886
60976,FOODS_3_824_WI_3_evaluation,0.243199,0.245389,0.248641,0.232435,0.278130,0.295340,0.269154,0.212915,0.286041,...,0.320179,0.393787,0.383770,0.312859,0.301751,0.319762,0.278482,0.315465,0.370460,0.361472
60977,FOODS_3_825_WI_3_evaluation,0.650081,0.546143,0.542788,0.525009,0.606502,0.722630,0.633302,0.681958,0.543443,...,0.766700,0.962446,1.198797,0.838200,0.892876,0.845790,0.727365,0.701763,0.798813,0.852268
60978,FOODS_3_826_WI_3_evaluation,1.058909,1.029093,0.983772,0.966315,1.156468,1.217530,1.175546,1.062811,0.987725,...,1.128510,1.364993,1.291035,1.018023,1.096480,0.960386,0.918764,1.162546,1.261039,1.117115


In [13]:
result.to_csv("20200625_day_by_day_28model_rmse.csv", index=False)

###  publicLBについて精度を確認

In [14]:
sales = pd.read_csv(EVALUATION)
dayCols = ["d_{}".format(i) for i in range(1914, 1941+1)]
y_true = sales[dayCols]

In [15]:
score = wrmsse_metric(validation_base.drop("id", axis=1), y_true, score_only=True)
print(score)

0.422765705738


## 日毎のpklの出力をマージして最終出力を作成する②
### N日目を予測する際にN, N+1, ..., 28日後予測モデルを使う場合

In [16]:
%%time

START_DAY = 1914-1

# validation期間について、値を格納していく
validation_base = pd.DataFrame({"id":sample_df[:30490]["id"]})

for day in range(1, 28+1):
    
    # print(f"processing...  day-{day}")
    
    #### n日後の予測結果をvalidation_baseに結合する #### 
    day_predict = pd.DataFrame()
    
    for store in STORES_IDS:
        
        # priceを格納したデータフレーム
        price_df = price_dict[store][START_DAY+day]
        
        # 予測値を格納したデータフレーム
        tmp = pd.read_pickle(PREDICT_PATH + f"submit_{store}_day{day}.pkl")
        tmp = tmp[tmp["d"]==(START_DAY+day)]
        
        # 予測値を""sell_price*sales""から""sales""に戻す
        tmp = tmp.merge(price_df, on=["id", "d"], how="left")
        tmp["pred"] = tmp["pred"]/tmp["sell_price"]
        day_predict = pd.concat([day_predict, tmp])
    
    # day_predictはid順になっているので、そのまま結合する
    validation_base["F"+str(day)] = day_predict["pred"].values
    
    ### n日後より以前の日付に関しては、値を加算していく
    for pre_day in range(1, day):
        
        pre_day_predict = pd.DataFrame()
    
        for store in STORES_IDS:
            
            # priceを格納したデータフレーム
            price_df = price_dict[store][START_DAY+pre_day]
            
            # 予測値を格納したデータフレーム
            tmp = pd.read_pickle(f"submit_{store}_day{day}.pkl")
            tmp = tmp[tmp["d"]==START_DAY+pre_day]
            
            # 予測値を""sell_price*sales""から""sales""に戻す
            tmp = tmp.merge(price_df, on=["id", "d"], how="left")
            tmp["pred"] = tmp["pred"]/tmp["sell_price"]
            pre_day_predict = pd.concat([pre_day_predict, tmp])
            
        validation_base["F"+str(pre_day)] = validation_base["F"+str(pre_day)] + pre_day_predict["pred"].values

# 1日目は28個の出力の合計となっているため28で除算する。2日目以降も同様
for i in range(1, 28+1):
    validation_base["F"+str(i)] = validation_base["F"+str(i)]/(28+1-i)

CPU times: user 1min 2s, sys: 4.27 s, total: 1min 7s
Wall time: 1min 7s


In [17]:
%%time

START_DAY = 1942-1

# evaluation期間について、値を格納していく
evaluation_base = pd.DataFrame({"id":sample_df[30490:]["id"]})

for day in range(1, 28+1):
    
    # print(f"processing...  day-{day}")
    
    #### n日後の予測結果をvalidation_baseに結合する #### 
    day_predict = pd.DataFrame()
    
    for store in STORES_IDS:
        
        # priceを格納したデータフレーム
        price_df = price_dict[store][START_DAY+day]
        
        # 予測値を格納したデータフレーム
        tmp = pd.read_pickle(PREDICT_PATH + f"submit_{store}_day{day}.pkl")
        tmp = tmp[tmp["d"]==(START_DAY+day)]
        
        # 予測値を""sell_price*sales""から""sales""に戻す
        tmp = tmp.merge(price_df, on=["id", "d"], how="left")
        tmp["pred"] = tmp["pred"]/tmp["sell_price"]
        day_predict = pd.concat([day_predict, tmp])
    
    # day_predictはid順になっているので、そのまま結合する
    # evaluation_base["F"+str(day)] = day_predict["pred"].values
    
    # 安全のため、idでmergeする
    day_predict["F"+str(day)] = day_predict["pred"]
    evaluation_base = evaluation_base.merge(day_predict[["id","F"+str(day)]], on="id", how="left")
    
    ### n日後より以前の日付に関しては、値を加算していく
    for pre_day in range(1, day):
        
        pre_day_predict = pd.DataFrame()
    
        for store in STORES_IDS:
            
            # priceを格納したデータフレーム
            price_df = price_dict[store][START_DAY+pre_day]
            
            # 予測値を格納したデータフレーム
            tmp = pd.read_pickle(f"submit_{store}_day{day}.pkl")
            tmp = tmp[tmp["d"]==START_DAY+pre_day]
            
            # 予測値を""sell_price*sales""から""sales""に戻す
            tmp = tmp.merge(price_df, on=["id", "d"], how="left")
            tmp["pred"] = tmp["pred"]/tmp["sell_price"]
            pre_day_predict = pd.concat([pre_day_predict, tmp])
        
        evaluation_base = evaluation_base.merge(pre_day_predict[["id", "pred"]], on="id", how="left")
        evaluation_base["F"+str(pre_day)] = evaluation_base["F"+str(pre_day)] + evaluation_base["pred"]
        evaluation_base = evaluation_base.drop("pred", axis=1)

# 1日目は28個の出力の合計となっているため28で除算する。2日目以降も同様
for i in range(1, 28+1):
    evaluation_base["F"+str(i)] = evaluation_base["F"+str(i)]/(28+1-i)

CPU times: user 19min 18s, sys: 3min 25s, total: 22min 44s
Wall time: 1min 24s


In [18]:
result = pd.concat([validation_base, evaluation_base])

In [19]:
result.to_csv("20200625_day_by_day_ensemble_28model_rmse.csv", index=False)

In [20]:
result

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.810273,0.729916,0.733052,0.766523,0.903870,1.096432,1.200532,0.948471,0.907227,...,0.894345,1.087929,1.016510,0.851923,0.773971,0.778284,0.845441,0.892108,1.027392,0.978838
1,HOBBIES_1_002_CA_1_validation,0.273704,0.247727,0.239025,0.226434,0.270131,0.324853,0.347951,0.234752,0.232599,...,0.270883,0.361372,0.371185,0.251448,0.221814,0.230082,0.252259,0.289213,0.350138,0.357989
2,HOBBIES_1_003_CA_1_validation,0.438019,0.378222,0.384912,0.392702,0.491298,0.648309,0.650302,0.454567,0.418799,...,0.542045,0.689520,0.708672,0.515351,0.453129,0.453700,0.453965,0.523773,0.717882,0.741483
3,HOBBIES_1_004_CA_1_validation,1.684387,1.339116,1.312237,1.344107,1.758297,2.450033,3.107584,1.841743,1.503902,...,1.648356,2.513749,3.144231,1.878564,1.461915,1.408493,1.356839,1.841009,2.939607,3.072670
4,HOBBIES_1_005_CA_1_validation,1.139230,0.975208,0.988828,1.002735,1.141363,1.453240,1.584292,1.224111,1.172162,...,1.153361,1.449580,1.532477,1.149679,0.995559,0.997047,1.028514,1.155727,1.406795,1.516309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,FOODS_3_823_WI_3_evaluation,0.464280,0.442483,0.381169,0.370923,0.429614,0.477181,0.465612,0.418552,0.405899,...,0.561295,0.761148,0.837550,0.603694,0.621959,0.624922,0.514295,0.571547,0.662605,0.675886
30486,FOODS_3_824_WI_3_evaluation,0.271765,0.269403,0.273841,0.279456,0.302425,0.351877,0.315071,0.275922,0.282237,...,0.312923,0.375538,0.378516,0.291955,0.293621,0.312159,0.303509,0.314157,0.380278,0.361472
30487,FOODS_3_825_WI_3_evaluation,0.630830,0.561167,0.512218,0.524996,0.618699,0.716889,0.653850,0.665493,0.550568,...,0.765048,0.969664,1.088333,0.818788,0.862339,0.816363,0.671438,0.718376,0.794892,0.852268
30488,FOODS_3_826_WI_3_evaluation,1.025929,1.002855,0.908893,0.924698,1.114164,1.206309,1.071646,1.031044,0.934454,...,1.138388,1.350531,1.374363,1.096396,1.141900,1.030730,0.996233,1.128471,1.257003,1.117115


### publicLBについて精度を確認

In [21]:
sales = pd.read_csv(EVALUATION)
dayCols = ["d_{}".format(i) for i in range(1914, 1941+1)]
y_true = sales[dayCols]

In [22]:
score = wrmsse_metric(validation_base.drop("id", axis=1), y_true, score_only=True)
print(score)

0.425021998133
