In [1]:
from  datetime import datetime, timedelta
import gc
import numpy as np, pandas as pd
import lightgbm as lgb

> This notebook aims to push the public LB under 0.50. Certainly, the competition is not yet at its peak and there clearly remains room for improvement.

# Credits

* [First R notebook](https://www.kaggle.com/kailex/m5-forecaster-v2)
* [Python translation](https://www.kaggle.com/kneroma/m5-forecast-v2-python)

# Changes
* v5 : try to optimise the LGBM params (go below in lgbm params section to see changes)
* v4 : add df, X_train deletion before training step --> increasing train sample without memeroy issues

In [2]:
CAL_DTYPES={"event_name_1": "category", "event_name_2": "category", "event_name_3": "category", 
            "event_type_1": "category", "event_type_2": "category", "event_type_3": "category",
            "weekday": "category", 'wm_yr_wk': 'int16', "wday": "int16",
        "month": "int16", "year": "int16", "snap_CA": "float32", 'snap_TX': 'float32', 'snap_WI': 'float32' }
PRICE_DTYPES = {"store_id": "category", "item_id": "category", "wm_yr_wk": "int16","sell_price":"float32" }

In [3]:
pd.options.display.max_columns = 50

In [15]:
h = 28 
max_lags = 57
tr_last = 1941 # = datetime(2016, 5, 22)
fday = datetime(2016,5,23)
fday

datetime.datetime(2016, 5, 23, 0, 0)

In [16]:
def create_dt(is_train = True, nrows = None, first_day = 1200):
    prices = pd.read_csv("./sell_prices.csv", dtype = PRICE_DTYPES)
    for col, col_dtype in PRICE_DTYPES.items():
        if col_dtype == "category":
            prices[col] = prices[col].cat.codes.astype("int16")
            prices[col] -= prices[col].min()
            
    cal = pd.read_csv("./exdata/calendar2.csv", dtype = CAL_DTYPES)
    cal["date"] = pd.to_datetime(cal["date"])
    for col, col_dtype in CAL_DTYPES.items():
        if col_dtype == "category":
            cal[col] = cal[col].cat.codes.astype("int16")
            cal[col] -= cal[col].min()
    
    start_day = max(1 if is_train  else tr_last-max_lags, first_day)
    numcols = [f"d_{day}" for day in range(start_day,tr_last+1)]
    catcols = ['id', 'item_id', 'dept_id','store_id', 'cat_id', 'state_id']
    dtype = {numcol:"float32" for numcol in numcols} 
    dtype.update({col: "category" for col in catcols if col != "id"})
    dt = pd.read_csv("./sales_train_evaluation.csv", #"./sales_train_validation.csv", 
                     nrows = nrows, usecols = catcols + numcols, dtype = dtype)
    
    for col in catcols:
        if col != "id":
            dt[col] = dt[col].cat.codes.astype("int16")
            dt[col] -= dt[col].min()
    
    if not is_train:
        for day in range(tr_last+1, tr_last+ 28 +1):
            dt[f"d_{day}"] = np.nan
    
    dt = pd.melt(dt,
                  id_vars = catcols,
                  value_vars = [col for col in dt.columns if col.startswith("d_")],
                  var_name = "d",
                  value_name = "sales")
    
    dt = dt.merge(cal, on= "d", copy = False)
    dt = dt.merge(prices, on = ["store_id", "item_id", "wm_yr_wk"], copy = False)
    
    return dt

In [17]:
def create_fea(dt):
    lags = [7, 28]
    lag_cols = [f"lag_{lag}" for lag in lags ]
    for lag, lag_col in zip(lags, lag_cols):
        dt[lag_col] = dt[["id","sales"]].groupby("id")["sales"].shift(lag)

    wins = [7, 28]
    for win in wins :
        for lag,lag_col in zip(lags, lag_cols):
            dt[f"rmean_{lag}_{win}"] = dt[["id", lag_col]].groupby("id")[lag_col].transform(lambda x : x.rolling(win).mean())

    
    
    date_features = {
        
        "wday": "weekday",
        "week": "weekofyear",
        "month": "month",
        "quarter": "quarter",
        "year": "year",
        "mday": "day",
#         "ime": "is_month_end",
#         "ims": "is_month_start",
    }
    
#     dt.drop(["d", "wm_yr_wk", "weekday"], axis=1, inplace = True)
    
    for date_feat_name, date_feat_func in date_features.items():
        if date_feat_name in dt.columns:
            dt[date_feat_name] = dt[date_feat_name].astype("int16")
        else:
            dt[date_feat_name] = getattr(dt["date"].dt, date_feat_func).astype("int16")

In [18]:
FIRST_DAY = 1 # If you want to load all the data set it to '1' -->  Great  memory overflow  risk !

In [19]:
%%time

df = create_dt(is_train=True, first_day= FIRST_DAY)
df.shape

CPU times: user 17.6 s, sys: 2.45 s, total: 20.1 s
Wall time: 20.1 s


(46881677, 25)

In [20]:
df.drop("Unnamed: 0", axis=1, inplace=True)

In [21]:
df.head()

Unnamed: 0,id,item_id,dept_id,store_id,cat_id,state_id,d,sales,date,wm_yr_wk,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,event_name_3,event_type_3,sell_price
0,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_1,12.0,2011-01-29,11101,2,1,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46
1,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_2,15.0,2011-01-30,11101,3,2,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46
2,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_3,0.0,2011-01-31,11101,1,3,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46
3,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_4,0.0,2011-02-01,11101,5,4,2,2011,0,0,0,0,1.0,1.0,0.0,0,0,0.46
4,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_5,0.0,2011-02-02,11101,6,5,2,2011,0,0,0,0,1.0,0.0,1.0,0,0,0.46


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46881677 entries, 0 to 46881676
Data columns (total 24 columns):
 #   Column        Dtype         
---  ------        -----         
 0   id            object        
 1   item_id       int16         
 2   dept_id       int16         
 3   store_id      int16         
 4   cat_id        int16         
 5   state_id      int16         
 6   d             object        
 7   sales         float32       
 8   date          datetime64[ns]
 9   wm_yr_wk      int16         
 10  weekday       int16         
 11  wday          int16         
 12  month         int16         
 13  year          int16         
 14  event_name_1  int16         
 15  event_type_1  int16         
 16  event_name_2  int16         
 17  event_type_2  int16         
 18  snap_CA       float32       
 19  snap_TX       float32       
 20  snap_WI       float32       
 21  event_name_3  int16         
 22  event_type_3  int16         
 23  sell_price    float32       
d

In [23]:
%%time

create_fea(df)
df.shape

CPU times: user 1min 38s, sys: 4.88 s, total: 1min 43s
Wall time: 1min 43s


(46881677, 33)

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46881677 entries, 0 to 46881676
Data columns (total 33 columns):
 #   Column        Dtype         
---  ------        -----         
 0   id            object        
 1   item_id       int16         
 2   dept_id       int16         
 3   store_id      int16         
 4   cat_id        int16         
 5   state_id      int16         
 6   d             object        
 7   sales         float32       
 8   date          datetime64[ns]
 9   wm_yr_wk      int16         
 10  weekday       int16         
 11  wday          int16         
 12  month         int16         
 13  year          int16         
 14  event_name_1  int16         
 15  event_type_1  int16         
 16  event_name_2  int16         
 17  event_type_2  int16         
 18  snap_CA       float32       
 19  snap_TX       float32       
 20  snap_WI       float32       
 21  event_name_3  int16         
 22  event_type_3  int16         
 23  sell_price    float32       
 

In [25]:
df.head()

Unnamed: 0,id,item_id,dept_id,store_id,cat_id,state_id,d,sales,date,wm_yr_wk,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,event_name_3,event_type_3,sell_price,lag_7,lag_28,rmean_7_7,rmean_28_7,rmean_7_28,rmean_28_28,week,quarter,mday
0,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_1,12.0,2011-01-29,11101,2,1,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46,,,,,,,4,1,29
1,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_2,15.0,2011-01-30,11101,3,2,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46,,,,,,,4,1,30
2,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_3,0.0,2011-01-31,11101,1,3,1,2011,0,0,0,0,0.0,0.0,0.0,0,0,0.46,,,,,,,5,1,31
3,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_4,0.0,2011-02-01,11101,5,4,2,2011,0,0,0,0,1.0,1.0,0.0,0,0,0.46,,,,,,,5,1,1
4,HOBBIES_1_008_CA_1_evaluation,7,0,0,0,0,d_5,0.0,2011-02-02,11101,6,5,2,2011,0,0,0,0,1.0,0.0,1.0,0,0,0.46,,,,,,,5,1,2


In [26]:
df.dropna(inplace = True)
df.shape

(45204727, 33)

In [27]:
cat_feats = ['item_id', 'dept_id','store_id', 'cat_id', 'state_id'] + ["event_name_1", "event_name_2", "event_name_3", "event_type_1", "event_type_2", "event_type_3"]
useless_cols = ["id", "date", "sales","d", "wm_yr_wk", "weekday"]
train_cols = df.columns[~df.columns.isin(useless_cols)]
X_train = df[train_cols]
y_train = df["sales"]

In [28]:
# train_data = lgb.Dataset(X_train, label = y_train, categorical_feature=cat_feats, free_raw_data=False)
# fake_valid_inds = np.random.choice(len(X_train), 1000000, replace = False)
# fake_valid_data = lgb.Dataset(X_train.iloc[fake_valid_inds], label = y_train.iloc[fake_valid_inds],categorical_feature=cat_feats,
#                              free_raw_data=False)   # This is just a subsample of the training set, not a real validation set !

In [29]:
%%time

np.random.seed(777)

fake_valid_inds = np.random.choice(X_train.index.values, 2_000_000, replace = False)
train_inds = np.setdiff1d(X_train.index.values, fake_valid_inds)
train_data = lgb.Dataset(X_train.loc[train_inds] , label = y_train.loc[train_inds], 
                         categorical_feature=cat_feats, free_raw_data=False)
fake_valid_data = lgb.Dataset(X_train.loc[fake_valid_inds], label = y_train.loc[fake_valid_inds],
                              categorical_feature=cat_feats,
                 free_raw_data=False)# This is a random sample, we're not gonna apply any time series train-test-split tricks here!

CPU times: user 8.47 s, sys: 491 ms, total: 8.97 s
Wall time: 8.96 s


In [30]:
del df, X_train, y_train, fake_valid_inds,train_inds ; gc.collect()

22

In [31]:
params = {
        "objective" : "poisson",
        "metric" :"rmse",
        "force_row_wise" : True,
        "learning_rate" : 0.075,
#         "sub_feature" : 0.8,
        "sub_row" : 0.75,
        "bagging_freq" : 1,
        "lambda_l2" : 0.1,
#         "nthread" : 4
        "metric": ["rmse"],
    'verbosity': 1,
    'num_iterations' : 1200,
    'num_leaves': 128,
    "min_data_in_leaf": 100,
}

In [32]:
%%time

m_lgb = lgb.train(params, train_data, valid_sets = [fake_valid_data], verbose_eval=20) 



[20]	valid_0's rmse: 3.07785
[40]	valid_0's rmse: 2.58861
[60]	valid_0's rmse: 2.46008
[80]	valid_0's rmse: 2.42257
[100]	valid_0's rmse: 2.41082
[120]	valid_0's rmse: 2.40258
[140]	valid_0's rmse: 2.39433
[160]	valid_0's rmse: 2.38466
[180]	valid_0's rmse: 2.37742
[200]	valid_0's rmse: 2.37055
[220]	valid_0's rmse: 2.3655
[240]	valid_0's rmse: 2.36006
[260]	valid_0's rmse: 2.35503
[280]	valid_0's rmse: 2.35137
[300]	valid_0's rmse: 2.34684
[320]	valid_0's rmse: 2.34306
[340]	valid_0's rmse: 2.33882
[360]	valid_0's rmse: 2.33557
[380]	valid_0's rmse: 2.33255
[400]	valid_0's rmse: 2.33015
[420]	valid_0's rmse: 2.32709
[440]	valid_0's rmse: 2.32424
[460]	valid_0's rmse: 2.32179
[480]	valid_0's rmse: 2.32011
[500]	valid_0's rmse: 2.31691
[520]	valid_0's rmse: 2.31474
[540]	valid_0's rmse: 2.31204
[560]	valid_0's rmse: 2.30988
[580]	valid_0's rmse: 2.30675
[600]	valid_0's rmse: 2.30484
[620]	valid_0's rmse: 2.30343
[640]	valid_0's rmse: 2.30155
[660]	valid_0's rmse: 2.29898
[680]	valid_0's

In [33]:
m_lgb.save_model("model.lgb")

<lightgbm.basic.Booster at 0x7f2040a67ed0>

In [54]:
fday

datetime.datetime(2016, 5, 23, 0, 0)

In [55]:
res = []
for tdelta in range(-28, 28):
    day = fday + timedelta(days=tdelta)
    res.append(day)

In [56]:
len(res)

56

In [63]:
print(len(res[:28]))
res[:28]

28


[datetime.datetime(2016, 4, 25, 0, 0),
 datetime.datetime(2016, 4, 26, 0, 0),
 datetime.datetime(2016, 4, 27, 0, 0),
 datetime.datetime(2016, 4, 28, 0, 0),
 datetime.datetime(2016, 4, 29, 0, 0),
 datetime.datetime(2016, 4, 30, 0, 0),
 datetime.datetime(2016, 5, 1, 0, 0),
 datetime.datetime(2016, 5, 2, 0, 0),
 datetime.datetime(2016, 5, 3, 0, 0),
 datetime.datetime(2016, 5, 4, 0, 0),
 datetime.datetime(2016, 5, 5, 0, 0),
 datetime.datetime(2016, 5, 6, 0, 0),
 datetime.datetime(2016, 5, 7, 0, 0),
 datetime.datetime(2016, 5, 8, 0, 0),
 datetime.datetime(2016, 5, 9, 0, 0),
 datetime.datetime(2016, 5, 10, 0, 0),
 datetime.datetime(2016, 5, 11, 0, 0),
 datetime.datetime(2016, 5, 12, 0, 0),
 datetime.datetime(2016, 5, 13, 0, 0),
 datetime.datetime(2016, 5, 14, 0, 0),
 datetime.datetime(2016, 5, 15, 0, 0),
 datetime.datetime(2016, 5, 16, 0, 0),
 datetime.datetime(2016, 5, 17, 0, 0),
 datetime.datetime(2016, 5, 18, 0, 0),
 datetime.datetime(2016, 5, 19, 0, 0),
 datetime.datetime(2016, 5, 20, 0,

In [61]:
print(len(res[28:]))
res[28:]

28


[datetime.datetime(2016, 5, 23, 0, 0),
 datetime.datetime(2016, 5, 24, 0, 0),
 datetime.datetime(2016, 5, 25, 0, 0),
 datetime.datetime(2016, 5, 26, 0, 0),
 datetime.datetime(2016, 5, 27, 0, 0),
 datetime.datetime(2016, 5, 28, 0, 0),
 datetime.datetime(2016, 5, 29, 0, 0),
 datetime.datetime(2016, 5, 30, 0, 0),
 datetime.datetime(2016, 5, 31, 0, 0),
 datetime.datetime(2016, 6, 1, 0, 0),
 datetime.datetime(2016, 6, 2, 0, 0),
 datetime.datetime(2016, 6, 3, 0, 0),
 datetime.datetime(2016, 6, 4, 0, 0),
 datetime.datetime(2016, 6, 5, 0, 0),
 datetime.datetime(2016, 6, 6, 0, 0),
 datetime.datetime(2016, 6, 7, 0, 0),
 datetime.datetime(2016, 6, 8, 0, 0),
 datetime.datetime(2016, 6, 9, 0, 0),
 datetime.datetime(2016, 6, 10, 0, 0),
 datetime.datetime(2016, 6, 11, 0, 0),
 datetime.datetime(2016, 6, 12, 0, 0),
 datetime.datetime(2016, 6, 13, 0, 0),
 datetime.datetime(2016, 6, 14, 0, 0),
 datetime.datetime(2016, 6, 15, 0, 0),
 datetime.datetime(2016, 6, 16, 0, 0),
 datetime.datetime(2016, 6, 17, 0,

In [65]:
%%time

alphas = [1.028, 1.023, 1.018]
weights = [1/len(alphas)]*len(alphas)
sub = 0.

for icount, (alpha, weight) in enumerate(zip(alphas, weights)):

    te = create_dt(False)
    cols = [f"F{i}" for i in range(1,29)]

    #for tdelta in range(0, 28):
    for tdelta in range(-28, 28):
        day = fday + timedelta(days=tdelta)
        print(tdelta, day)
        tst = te[(te.date >= day - timedelta(days=max_lags)) & (te.date <= day)].copy()
        create_fea(tst)
        tst = tst.loc[tst.date == day , train_cols]
        te.loc[te.date == day, "sales"] = alpha*m_lgb.predict(tst) # magic multiplier by kyakovlev



    #te_sub = te.loc[te.date >= fday, ["id", "sales"]].copy()
    te_sub = te.loc[te.date >= (fday - timedelta(days=28)), ["id", "sales"]].copy()
    #te_sub.loc[te.date >= fday+ timedelta(days=h), "id"] = te_sub.loc[te.date >= fday+timedelta(days=h), "id"].str.replace("validation$", "evaluation")
    te_sub.loc[te.date < fday, "id"] = te_sub.loc[te.date < fday, "id"].str.replace("evaluation$", "validation")
    te_sub["F"] = [f"F{rank}" for rank in te_sub.groupby("id")["id"].cumcount()+1]
    te_sub = te_sub.set_index(["id", "F" ]).unstack()["sales"][cols].reset_index()
    te_sub.fillna(0., inplace = True)
    te_sub.sort_values("id", inplace = True)
    te_sub.reset_index(drop=True, inplace = True)
    te_sub.to_csv(f"submission_eval_{icount}.csv",index=False)
    if icount == 0 :
        sub = te_sub
        sub[cols] *= weight
    else:
        sub[cols] += te_sub[cols]*weight
    print(icount, alpha, weight)


#sub2 = sub.copy()
#sub2["id"] = sub2["id"].str.replace("validation$", "evaluation")
#sub = pd.concat([sub, sub2], axis=0, sort=False)
sub.to_csv("submission_eval.csv",index=False)

-28 2016-04-25 00:00:00
-27 2016-04-26 00:00:00
-26 2016-04-27 00:00:00
-25 2016-04-28 00:00:00
-24 2016-04-29 00:00:00
-23 2016-04-30 00:00:00
-22 2016-05-01 00:00:00
-21 2016-05-02 00:00:00
-20 2016-05-03 00:00:00
-19 2016-05-04 00:00:00
-18 2016-05-05 00:00:00
-17 2016-05-06 00:00:00
-16 2016-05-07 00:00:00
-15 2016-05-08 00:00:00
-14 2016-05-09 00:00:00
-13 2016-05-10 00:00:00
-12 2016-05-11 00:00:00
-11 2016-05-12 00:00:00
-10 2016-05-13 00:00:00
-9 2016-05-14 00:00:00
-8 2016-05-15 00:00:00
-7 2016-05-16 00:00:00
-6 2016-05-17 00:00:00
-5 2016-05-18 00:00:00
-4 2016-05-19 00:00:00
-3 2016-05-20 00:00:00
-2 2016-05-21 00:00:00
-1 2016-05-22 00:00:00
0 2016-05-23 00:00:00
1 2016-05-24 00:00:00
2 2016-05-25 00:00:00
3 2016-05-26 00:00:00
4 2016-05-27 00:00:00
5 2016-05-28 00:00:00
6 2016-05-29 00:00:00
7 2016-05-30 00:00:00
8 2016-05-31 00:00:00
9 2016-06-01 00:00:00
10 2016-06-02 00:00:00
11 2016-06-03 00:00:00
12 2016-06-04 00:00:00
13 2016-06-05 00:00:00
14 2016-06-06 00:00:00
15

In [66]:
sub.head(10)

F,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_evaluation,0.849391,0.802276,0.797418,0.810796,0.925747,1.168519,1.173836,0.895955,0.786679,0.858627,0.905828,1.06431,1.222032,1.250776,0.874021,0.856935,0.890149,0.844106,1.029998,1.198555,1.170897,0.889134,0.802814,0.811161,0.83809,0.935525,1.135489,1.08101
1,FOODS_1_001_CA_1_validation,0.296361,0.301865,0.305283,0.28364,1.159695,1.387745,1.322584,0.906634,0.877028,0.980511,0.958181,0.904757,1.154496,1.138203,0.880199,0.808664,0.830866,0.860497,0.954275,1.267233,1.248287,0.836722,0.776258,0.757427,0.753051,0.99927,1.228965,1.218133
2,FOODS_1_001_CA_2_evaluation,0.954964,0.93474,0.940518,0.944434,1.128886,1.595882,1.404253,1.24464,0.943474,1.059485,1.211929,1.309447,1.676127,1.667219,1.117059,1.171212,1.181751,1.123649,1.461129,1.747048,1.477357,1.192713,0.999053,1.027282,1.108065,1.151996,1.62224,1.4326
3,FOODS_1_001_CA_2_validation,0.155728,0.16464,0.180784,0.148843,1.36228,2.052081,1.836961,1.076957,1.011764,0.981135,0.914122,1.089346,1.634057,1.310689,1.069209,1.039047,1.053834,1.08085,1.267667,1.845881,1.845687,1.335054,1.246478,1.232175,1.249718,1.171779,1.619299,1.445666
4,FOODS_1_001_CA_3_evaluation,0.963903,0.970878,0.909762,0.925891,1.016772,1.337821,1.378838,1.19058,1.02689,1.055695,1.10651,1.228066,1.367578,1.458863,1.113777,1.108658,1.108019,1.044785,1.194407,1.491182,1.371888,1.134207,1.112496,1.045538,1.013706,1.052553,1.344706,1.386429
5,FOODS_1_001_CA_3_validation,0.274088,0.237685,0.226995,0.250718,1.642332,1.656946,1.289368,1.073668,1.134041,0.975214,0.97064,1.067113,1.351383,1.351479,1.102096,1.116161,1.072684,1.127687,1.170088,1.695738,1.975253,1.38217,1.322116,1.253308,1.285954,1.062884,1.350867,1.284929
6,FOODS_1_001_CA_4_evaluation,0.363313,0.353694,0.371253,0.372887,0.407163,0.43848,0.428743,0.365829,0.354617,0.404239,0.42317,0.454506,0.457912,0.466849,0.402175,0.395593,0.424445,0.421241,0.457959,0.500333,0.505451,0.432586,0.409521,0.437787,0.434528,0.440982,0.481165,0.459024
7,FOODS_1_001_CA_4_validation,0.258269,0.191065,0.196169,0.224075,0.416322,0.499058,0.561725,0.416382,0.419759,0.43334,0.436201,0.430548,0.460485,0.389126,0.395363,0.395304,0.422695,0.421304,0.464876,0.532018,0.551986,0.436329,0.409338,0.412632,0.415399,0.393772,0.454874,0.475019
8,FOODS_1_001_TX_1_evaluation,0.3466,0.362861,0.364729,0.373782,0.413925,0.483351,0.488945,0.388062,0.370989,0.483704,0.503783,0.550314,0.543624,0.604596,0.463639,0.452138,0.463752,0.465932,0.488623,0.55474,0.551702,0.47122,0.432128,0.46935,0.471779,0.460249,0.519582,0.518465
9,FOODS_1_001_TX_1_validation,0.138662,0.131846,0.134269,0.151908,0.198639,0.200995,0.252417,0.683515,0.634941,0.609896,0.656544,0.690374,0.654847,0.614883,0.555311,0.585438,0.753162,0.618202,0.781839,0.794965,0.854359,0.536689,0.460318,0.48569,0.436082,0.381532,0.457537,0.478181


In [69]:
sub.id.nunique(), sub["id"].str.contains("validation$").sum(), sub["id"].str.contains("evaluation$").sum()

(60980, 30490, 30490)

In [68]:
sub.shape

(60980, 29)

In [None]:
#!kaggle competitions submit -c m5-forecasting-accuracy -f submission_eval.csv -m "20200628(1) Evaluation version of 20200627(1)"; 1.29806