## import

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import dask.dataframe as dd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import lightgbm as lgb
import optuna.integration.lightgbm as lgb_optuna
#import dask_xgboost as xgb
#import dask.dataframe as dd6
from sklearn import preprocessing, metrics
from sklearn.preprocessing import LabelEncoder
import gc
import os
from tqdm import tqdm_notebook as tqdm
from scipy.sparse import csr_matrix
import pickle

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns: #columns毎に処理
        col_type = df[col].dtypes
        if col_type in numerics: #numericsのデータ型の範囲内のときに処理を実行. データの最大最小値を元にデータ型を効率的なものに変更
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

## Use Files

In [None]:
PATHS = {}
for store_id in ['CA_1','CA_2','CA_3','CA_4','TX_1','TX_2','TX_3','WI_1','WI_2','WI_3']:
    PATHS[store_id] = '/kaggle/input/m5-all-data/df_' + store_id + '.pkl'
    
PATHS2 = {}
PATHS2['CA_1'] = '/kaggle/input/binary-challenge-evaluation-ca-1-2/binary_pred_CA_1.pkl'
PATHS2['CA_2'] = '/kaggle/input/binary-challenge-evaluation-ca-1-2/binary_pred_CA_2.pkl'
PATHS2['CA_3'] = '/kaggle/input/binary-challenge-evaluation-ca-3-4/binary_pred_CA_3.pkl'
PATHS2['CA_4'] = '/kaggle/input/binary-challenge-evaluation-ca-3-4/binary_pred_CA_4.pkl'
PATHS2['TX_1'] = '/kaggle/input/binary-challenge-evaluation-tx-1-2-3/binary_pred_TX_1.pkl'
PATHS2['TX_2'] = '/kaggle/input/binary-challenge-evaluation-tx-1-2-3/binary_pred_TX_2.pkl'
PATHS2['TX_3'] = '/kaggle/input/binary-challenge-evaluation-tx-1-2-3/binary_pred_TX_3.pkl'
PATHS2['WI_1'] = '/kaggle/input/binary-challenge-evaluation-wi-1-2-3/binary_pred_WI_1.pkl'
PATHS2['WI_2'] = '/kaggle/input/binary-challenge-evaluation-wi-1-2-3/binary_pred_WI_2.pkl'
PATHS2['WI_3'] = '/kaggle/input/binary-challenge-evaluation-wi-1-2-3/binary_pred_WI_3.pkl'

PATHS3 = {}
PATHS3['CA_1'] = '/kaggle/input/m5-lags-features-1to35-ca/lags_df_1to35_CA_1.pkl'
PATHS3['CA_2'] = '/kaggle/input/m5-lags-features-1to35-ca/lags_df_1to35_CA_2.pkl'
PATHS3['CA_3'] = '/kaggle/input/m5-lags-features-1to35-ca/lags_df_1to35_CA_3.pkl'
PATHS3['CA_4'] = '/kaggle/input/m5-lags-features-1to35-ca/lags_df_1to35_CA_4.pkl'
PATHS3['TX_1'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_TX_1.pkl'
PATHS3['TX_2'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_TX_2.pkl'
PATHS3['TX_3'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_TX_3.pkl'
PATHS3['WI_1'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_WI_1.pkl'
PATHS3['WI_2'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_WI_2.pkl'
PATHS3['WI_3'] = '/kaggle/input/fork-of-m5-lags-features-1to35-tx-and-wi/lags_df_1to35_WI_3.pkl'

In [None]:
te2 = pd.read_pickle('../input/m5-target-encoding2/te_60.pkl')
event_lag = pd.read_pickle('../input/kernel1f1484cf46/event_lag_df.pkl').drop(columns=['sales', 'event_name_1','event_lag_0'])

def load_data(store_id):
    df1 = pd.read_pickle(PATHS[store_id])
    df2 = pd.read_pickle(PATHS2[store_id])
    df3 = pd.read_pickle(PATHS3[store_id]).drop(columns=['store_id', 'sales']).iloc[:,:29]
    df1 = df1.merge(df2,on=['id', 'd'],how='left')
    df1 = df1.merge(df3,on=['id', 'd'],how='left')
    df1 = df1.merge(te2,on=['id', 'd'],how='left')
    df1 = df1.merge(event_lag,on=['id', 'd'],how='left')
    del df2,df3
    df1.id = df1.id.astype('category')
    gc.collect()

    return df1

In [None]:
gc.collect()

## Define Features

In [None]:
TARGET = 'sales'

basic_features = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id', 
        'release', 'sell_price', 'price_max', 'price_min', 'price_std',
       'price_mean', 'price_norm', 'price_nunique', 'item_nunique',
       'price_momentum', 'price_momentum_m', 'price_momentum_y',
       'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI', 'tm_d', 'tm_w', 'tm_m', 'tm_y',
       'tm_wm', 'tm_dw', 'tm_w_end']

encoding_features = ['te_id_28', 'te_item_id_28', 'te_dept_id_28', 'te_cat_id_28',
       'te_store_id_28', 'te_state_id_28', 'te_id_tm_dw_28',
       'te_item_id_tm_dw_28', 'te_dept_id_tm_dw_28', 'te_cat_id_tm_dw_28',
       'te_store_id_tm_dw_28', 'te_state_id_tm_dw_28'] + [
       'te_id_60', 'te_item_id_60',
       'te_dept_id_60', 'te_cat_id_60', 'te_store_id_60', 'te_state_id_60',
       'te_id_tm_dw_60', 'te_item_id_tm_dw_60', 'te_dept_id_tm_dw_60',
       'te_cat_id_tm_dw_60', 'te_store_id_tm_dw_60', 'te_state_id_tm_dw_60']

lag_features = [
        'sales_lag_28','sales_lag_29', 'sales_lag_30', 'sales_lag_31', 'sales_lag_32',
        'sales_lag_33', 'sales_lag_34', 'sales_lag_35', 'sales_lag_36','sales_lag_37',
        'sales_lag_38', 'sales_lag_39', 'sales_lag_40','sales_lag_41', 'sales_lag_42', 
        'rolling_mean_7', 'rolling_std_7','rolling_mean_14', 'rolling_std_14',
        'rolling_mean_30','rolling_std_30', 'rolling_mean_60', 'rolling_std_60',
        'rolling_mean_180', 'rolling_std_180',]

day_by_day = ['sales_lag_1', 'sales_lag_2', 'sales_lag_3', 'sales_lag_4',
       'sales_lag_5', 'sales_lag_6', 'sales_lag_7', 'sales_lag_8',
       'sales_lag_9', 'sales_lag_10', 'sales_lag_11', 'sales_lag_12',
       'sales_lag_13', 'sales_lag_14', 'sales_lag_15', 'sales_lag_16',
       'sales_lag_17', 'sales_lag_18', 'sales_lag_19', 'sales_lag_20',
       'sales_lag_21', 'sales_lag_22', 'sales_lag_23', 'sales_lag_24',
       'sales_lag_25', 'sales_lag_26', 'sales_lag_27']

recursive_features =['rolling_mean_tmp_1_7','rolling_mean_tmp_1_14',
    'rolling_mean_tmp_1_30','rolling_mean_tmp_1_60',
    'rolling_mean_tmp_7_7','rolling_mean_tmp_7_14',
    'rolling_mean_tmp_7_30','rolling_mean_tmp_7_60',
    'rolling_mean_tmp_14_7','rolling_mean_tmp_14_14',
    'rolling_mean_tmp_14_30','rolling_mean_tmp_14_60']

	

additional_features = ['binary_pred'] + ['event_lag_1','event_lag_2','event_lag_3','event_lag_4','event_lag_5','event_lag_6',
     'event_lag_-7','event_lag_-6','event_lag_-5','event_lag_-4','event_lag_-3','event_lag_-2','event_lag_-1'] + day_by_day

remove_features = ['store_id', 'state_id', 
                   'te_store_id_28', 'te_state_id_28', 'te_store_id_tm_dw_28', 'te_state_id_tm_dw_28',
                   'te_store_id_60', 'te_state_id_60', 'te_store_id_tm_dw_60', 'te_state_id_tm_dw_60',
                   'snap_CA', 'snap_TX', 'snap_WI']

use_enc_feat = True
use_lag_feat = True
use_rec_feat = False
use_add_feat = True

feature = basic_features
if use_enc_feat:
    feature += encoding_features
if use_lag_feat:
    feature += lag_features
if use_rec_feat:
    feature += recursive_features
if use_add_feat:
    feature += additional_features
    
feature = [i for i in feature if i not in remove_features]
    
len(feature)

In [None]:
feature

## Model

In [None]:
# define lgbm simple model using custom loss and eval metric for early stopping
def run_lgb(train, val,test, features, custom_loss, custom_eval, optuna_params={}, use_custom=True):

    train_set = lgb.Dataset(train[features], train[TARGET].values, free_raw_data=False, params={'data_type':'train'})
    del train
    gc.collect()
    
    val_set = lgb.Dataset(val[features], val[TARGET].values, free_raw_data=False, params={'data_type':'validation'})
    del val
    gc.collect()
    
    test_set = lgb.Dataset(test[features], test[TARGET].values, free_raw_data=False, params={'data_type':'test'})
    del test
    gc.collect()

    if use_custom:
        params = {
            'boosting_type': 'gbdt',
            'first_metric_only': True,
            'objective': 'custom',
            'metric': 'custom',
#             'n_jobs': -1,
#             'seed': 42,
#             'lambda_l1': 0.011947955379673579,
#             'lambda_l2': 0.0002267728823544454,
#             'num_leaves': 31,
#             'feature_fraction': 0.45199999999999996,
#              'bagging_fraction': 1.0,
#              'bagging_freq': 0,
#              'min_child_samples': 50,
#             'learning_rate': 0.1,
#             'n_estimators': 20,
#             'bagging_fraction': 0.75,
#             'bagging_freq': 10, 
#             'colsample_bytree': 0.75
        }
        params.update(optuna_params)
        
        model = lgb.train(params, train_set, num_boost_round = 1500, early_stopping_rounds = 100, 
                          valid_sets = [train_set, val_set, test_set], valid_names=['Train','Val','Test'], 
                          verbose_eval = 10, fobj = custom_loss, feval = custom_eval)
    else:
        params = {
        'boosting_type': 'gbdt',
        'objective': 'tweedie',
        'first_metric_only': True,
        'tweedie_variance_power': 1.1,
        'metric': 'custom',
        'n_jobs': -1,
        'seed': 42,
        'learning_rate': 0.1,
        'bagging_fraction': 0.75,
        'bagging_freq': 10, 
        'colsample_bytree': 0.75}
        params.update(optuna_params)

        model = lgb.train(params, train_set, num_boost_round = 1500, early_stopping_rounds = 100, 
                          valid_sets = [train_set, val_set, test_set], valid_names=['Train','Val','Test'],
                          verbose_eval = 10,feval = custom_eval)
    return model

# define lgbm simple model using custom loss and eval metric for early stopping
def run_lgb_no_early_stopping(train, features, custom_loss, custom_eval, optuna_params={}, use_custom=True, num_boost_round = 200):

    train_set = lgb.Dataset(train[features], train[TARGET].values, free_raw_data=False, params={'data_type':'train'})
    del train
    gc.collect()

    if use_custom:
        params = {
            'boosting_type': 'gbdt',
            'first_metric_only': True,
            'objective': 'custom',
            'metric': 'rmse',
#             'n_jobs': -1,
#             'seed': 42,
#             'learning_rate': 0.1,
# #             'n_estimators': 20,
#             'bagging_fraction': 0.75,
#             'bagging_freq': 10, 
#             'colsample_bytree': 0.75
        }
        params.update(optuna_params)
        
        model = lgb.train(params, train_set, valid_sets = [train_set], num_boost_round = num_boost_round, verbose_eval = 10, fobj = custom_loss)
    else:
        params = {
        'boosting_type': 'gbdt',
        'objective': 'tweedie',
        'first_metric_only': True,
        'tweedie_variance_power': 1.1,
        'metric': 'custom',
        'n_jobs': -1,
        'seed': 42,
        'learning_rate': 0.1,
        'bagging_fraction': 0.75,
        'bagging_freq': 10, 
        'colsample_bytree': 0.75}
        params.update(optuna_params)

        model = lgb.train(params, train_set, num_boost_round = num_boost_round, early_stopping_rounds = 100, 
                          valid_sets = [train_set, val_set, test_set], valid_names=['Train','Val','Test'],
                          verbose_eval = 10,feval = custom_eval)
    return model

# define lgbm simple model using custom loss and eval metric for early stopping
def run_lgb_optuna(train, val,test, features, custom_loss, custom_eval, use_custom=True):

    train_set = lgb.Dataset(train[features], train[TARGET].values, free_raw_data=False, params={'data_type':'train'})
    del train
    gc.collect()
    
    val_set = lgb.Dataset(val[features], val[TARGET].values, free_raw_data=False, params={'data_type':'validation'})
    del val
    gc.collect()
    
    test_set = lgb.Dataset(test[features], test[TARGET].values, free_raw_data=False, params={'data_type':'test'})
    del test
    gc.collect()
    
    best_params, history = {}, []

    if use_custom:
        params = {
            'boosting_type': 'gbdt',
            'first_metric_only': True,
            'objective': 'custom',
            'metric': 'wrmsse',
            }
        model = lgb_optuna.train(params, train_set, num_boost_round = 1500, early_stopping_rounds = 100, 
                          valid_sets = [train_set, val_set, test_set], valid_names=['Train','Val','Test'], 
                          verbose_eval = 10, fobj = custom_loss, feval = custom_eval,best_params=best_params,tuning_history=history)
    else:
        params = {
        'boosting_type': 'gbdt',
        'objective': 'tweedie',
        'first_metric_only': True,
        'tweedie_variance_power': 1.1,
        'metric': 'custom'
        }
        
        model = lgb_optuna.train(params, train_set, num_boost_round = 1500, early_stopping_rounds = 100, 
                          valid_sets = [train_set, val_set, test_set], valid_names=['Train','Val','Test'],
                          verbose_eval = 10,feval = custom_eval,best_params=best_params,tuning_history=history)
    return model, best_params, history

## Loss Function

In [None]:
# define cost and eval functions
def custom_asymmetric_train(y_pred, y_true):
    y_true = y_true.get_label()
    a = 1.15
    b = 1
    residual = (y_true - y_pred).astype("float")
    grad = np.where(residual < 0, -2 * residual * b, -2 * residual * a)
    hess = np.where(residual < 0, 2 * b, 2 * a)
    return grad, hess

def tweedie(y_pred, y_true):
    p = 1.1
    y_true = y_true.get_label()
    grad = -y_true*y_pred**(-p) + y_pred**(1-p)
    hess = p*y_true*y_pred**(-p-1)-(1-p)*y_pred**(-p)
    return grad, hess

def tweedie2(y_pred, y_true):
    p = 1.09
    y_true = y_true.get_label()
    grad = -y_true*np.exp(y_pred*(1-p))+np.exp(y_pred*(2-p))
    hess = -(1-p)*y_true*np.exp(y_pred*(1-p))+(2-p)*np.exp(y_pred*(2-p))
    return grad, hess

def tweedie3(y_pred, y_true):
    p = y_true.get_data()['p'].values
    print(p)
    y_true = y_true.get_label()
    print(y_true)
    print(y_pred)
    grad = np.where(p<1,-y_true + np.exp(y_pred),-y_true*np.exp(y_pred*(1-p))+np.exp(y_pred*(2-p)))
    grad = np.where(p>1.5,-y_true*np.exp(y_pred*(1-1.5))+np.exp(y_pred*(2-1.5)),grad)
    hess = np.where(p<1,np.exp(y_pred),-(1-p)*y_true*np.exp(y_pred*(1-p))+(2-p)*np.exp(y_pred*(2-p)))
    hess = np.where(p>1.5,-y_true*np.exp(y_pred*(1-1.5))+np.exp(y_pred*(2-1.5)),hess)
    print(grad,hess)
    return grad, hess

def tweedie_sum(y_pred, y_true):
    p1 = 1.1
    p2 = 1.5
    y_true = y_true.get_label()
    grad = -y_true*np.exp(y_pred*(1-p1))+np.exp(y_pred*(2-p1))
    grad += -y_true*np.exp(y_pred*(1-p2))+np.exp(y_pred*(2-p2))
    hess = -(1-p1)*y_true*np.exp(y_pred*(1-p1))+(2-p1)*np.exp(y_pred*(2-p1))
    hess += -(1-p2)*y_true*np.exp(y_pred*(1-p2))+(2-p2)*np.exp(y_pred*(2-p2))
#     print(grad,hess)
    return grad, hess

# define cost and eval functions
def custom_asymmetric_train_2(y_pred, y_true):
    y_true = y_true.get_label()
    residual = (y_true - y_pred).astype("float")
    grad = np.where(residual < 0, -2 * residual, -2 * residual * 1.15)
    grad = np.where(y_true == 0, (1-0.5)*np.exp(y_pred), grad)
    hess = np.where(residual < 0, 2, 2 * 1.15)
    hess = np.where(y_true == 0, (1-0.5)*np.exp(y_pred), hess)

    return grad, hess

def zero_inflated_poisson_loss(y_pred, y_true):
    p = 0.3
    y_true = y_true.get_label()
    y_pred = np.exp(y_pred)
    grad = np.where(y_true == 0, (1-p)*y_pred,-y_true + y_pred)
    hess = np.where(y_true == 0, (1-p)*y_pred, y_pred)
    return grad, hess

def poisson(y_pred, y_true):
    y_true = y_true.get_label()
    y_pred = np.exp(y_pred)
    grad = -y_true + y_pred
    hess = y_pred
    return grad, hess

def custom_a(y_pred, y_true):
    y_true = y_true.get_label()
    d = y_pred - y_true 
    grad = np.tanh(d)/y_true
    hess = (1.0 - grad*grad)/y_true
    return grad, hess

def my_objective(y_pred, y_true):
    y_true = y_true.get_label()
    d = y_pred - y_true 
    grad = 2*d
    hess = 2*d/d
    return grad, hess

def loglikelood(preds, train_data):
    labels = train_data.get_label()
    preds = 1. / (1. + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1. - preds)
    return grad, hess


def asymmetric_plus_tweedie(y_pred, y_true):
    p = 1.06
    a = 1.2
    b = 1
    y_true = y_true.get_label()
    residual = (y_true - y_pred).astype("float")
    y_pred = np.exp(y_pred)
    grad = -y_true*y_pred**(1-p)+y_pred**(2-p)
    grad += np.where(residual < 0, -2 * (y_true-y_pred)*y_pred * b, -2 * (y_true-y_pred)*y_pred * a)
    hess = -(1-p)*y_true*y_pred**(1-p)+(2-p)*y_pred**(2-p)
    hess += np.where(residual < 0, -2 * (y_true-2*y_pred)*y_pred * b, -2 * (y_true-2*y_pred)*y_pred * a)
    return grad, hess


def asymmetric_tweedie(y_pred, y_true):
    p = 1.0
    a = 1.2
    b = 1
    y_true = y_true.get_label()
    residual = (y_true - y_pred).astype("float")
    y_pred = np.exp(y_pred)
    grad = np.where(residual < 0,(-y_true*y_pred**(1-p)+y_pred**(2-p)) * b,(-y_true*y_pred**(1-p)+y_pred**(2-p)) * a)
    hess = np.where(residual < 0,(-(1-p)*y_true*y_pred**(1-p)+(2-p)*y_pred**(2-p))*b,(-(1-p)*y_true*y_pred**(1-p)+(2-p)*y_pred**(2-p))*a)
    return grad, hess

In [None]:
def link_exp(pred):
    return np.exp(pred)
def link_normal(pred):
    return pred
link_func_dict = {'tweedie2':link_exp,'custom_asymmetric_train':link_normal,'asymmetric_plus_tweedie':link_exp,'asymmetric_tweedie':link_exp}
loss_func_dict = {'tweedie2':tweedie2,'custom_asymmetric_train':custom_asymmetric_train,'asymmetric_plus_tweedie':asymmetric_plus_tweedie,'asymmetric_tweedie':asymmetric_tweedie}
LOSS_NAME = 'tweedie2'


## Evaluation Function

In [None]:
def get_weight_mat(product):
    NUM_ITEMS = len(product['id'].unique()) 
    weight_mat = np.c_[np.ones([NUM_ITEMS,1]).astype(np.int8), # level 1
                       pd.get_dummies(product.state_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.store_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.cat_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.dept_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.state_id.astype(str) + product.cat_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.state_id.astype(str) + product.dept_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.store_id.astype(str) + product.cat_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.store_id.astype(str) + product.dept_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.item_id.astype(str),drop_first=False).astype('int8').values,
                       pd.get_dummies(product.state_id.astype(str) + product.item_id.astype(str),drop_first=False).astype('int8').values,
                       np.identity(NUM_ITEMS).astype(np.int8) #item :level 12
                       ].T

    weight_mat_csr = csr_matrix(weight_mat)
    del weight_mat; gc.collect()
    return weight_mat_csr

def weight1_calc(product,weight_mat_csr):
    # calculate the denominator of RMSSE, and calculate the weight base on sales amount
    sales_train_val = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')
    sales_train_val = sales_train_val.set_index('id')
    sales_train_val = sales_train_val.loc[product.id]
    d_name = ['d_' + str(i+1) for i in range(1941)]
    sales_train_val = weight_mat_csr * sales_train_val[d_name].values
    # calculate the start position(first non-zero demand observed date) for each item / 商品の最初の売上日
    # 1-1914のdayの数列のうち, 売上が存在しない日を一旦0にし、0を9999に置換。そのうえでminimum numberを計算
    df_tmp = ((sales_train_val>0) * np.tile(np.arange(1,1942),(weight_mat_csr.shape[0],1)))
    start_no = np.min(np.where(df_tmp==0,9999,df_tmp),axis=1)-1
    flag = np.dot(np.diag(1/(start_no+1)) , np.tile(np.arange(1,1942),(weight_mat_csr.shape[0],1)))<1
    sales_train_val = np.where(flag,np.nan,sales_train_val)
    # denominator of RMSSE / RMSSEの分母
    weight1 = np.nansum(np.diff(sales_train_val,axis=1)**2,axis=1)/(1941-start_no)

    del sales_train_val
    gc.collect()
    
    return weight1

def weight2_calc(data_v,product,weight_mat_csr):
    # calculate the sales amount for each item/level
    df_tmp = data_v[['id','sales','sell_price']]
    df_tmp['amount'] = df_tmp['sales'] * df_tmp['sell_price']
    df_tmp =df_tmp.groupby(['id'])['amount'].apply(np.sum)
    df_tmp = df_tmp[product.id].values
    weight2 = weight_mat_csr * df_tmp 
    weight2 = weight2/np.sum(weight2)
    
    return  weight2

def weight2_calc_for_train(data_v,product,weight_mat_csr):
    # calculate the sales amount for each item/level
    df_tmp = data_v[['id','sales','sell_price']]
    df_tmp['amount'] = df_tmp['sales'] * df_tmp['sell_price']
    df_tmp.amount = df_tmp.amount.astype(int)
    df_tmp =df_tmp.groupby(['id'])['amount'].apply(np.sum)
    df_tmp = df_tmp[product.id].values
    weight2 = weight_mat_csr * df_tmp 
    weight2 = weight2/np.sum(weight2)
    
    return  weight2

def wrmsse(preds, data):
    
    # this function is calculate for last 28 days to consider the non-zero demand period
    
    y_true = data.get_label()
    if use_custom_loss:
        preds = link_func_dict[LOSS_NAME](preds)
    
    if data.params['data_type']=='train':
        return 'wrmsse', 1371, False
        weight2 = weight2_tr
        MASK = np.isin(np.arange(train_width_date*NUM_ITEMS),lost_days)

        pred_tmp = np.arange(train_width_date*NUM_ITEMS)
        pred_tmp[MASK] = 0
        pred_tmp[~MASK] = preds
        preds = pred_tmp
        
        y_true_tmp = np.arange(train_width_date*NUM_ITEMS)
        y_true_tmp[MASK] = 0
        y_true_tmp[~MASK] = y_true
        y_true = y_true_tmp
        
    elif data.params['data_type']=='validation':
        weight2 = weight2_val
    elif data.params['data_type']=='test':
        weight2 = weight2_te

    reshaped_preds = preds.reshape([-1, NUM_ITEMS]).T
    reshaped_true = y_true.reshape([-1, NUM_ITEMS]).T    
          
    train = weight_mat_csr*(reshaped_preds - reshaped_true)
    score = np.sum(
                np.sqrt(
                    np.mean(
                        np.square(train)
                        ,axis=1) / weight1) * weight2)
#     print(np.mean(np.sqrt(np.mean(np.square(reshaped_preds-reshaped_true),axis=1))))
    return 'wrmsse', score, False

def wrmsse_sub(preds, y_true):
              
    reshaped_preds = preds.reshape([-1, NUM_ITEMS]).T
    reshaped_true = y_true.reshape([-1, NUM_ITEMS]).T    
    if use_custom_loss:
        preds = link_func_dict[LOSS_NAME](preds)
        
    train = weight_mat_csr*(reshaped_preds - reshaped_true)

    score = np.sum(
                np.sqrt(
                    np.mean(
                        np.square(train)
                        ,axis=1) / weight1) * weight2)

    return 'wrmsse', score, False

In [None]:
def rmse(preds, data):
    y_true = data.get_label()
    if use_custom_loss:
        preds = link_func_dict[LOSS_NAME](preds)
                  
    train = preds - y_true
    
    score = np.mean(np.sqrt(np.mean(np.square(train))))
    
    return 'rmse', score, False

In [None]:
def metrics(preds, data):
    """複数の評価指標を計算するための関数"""
    return [
        wrmsse(preds, data),
        rmse(preds, data)
    ]

## Preparation

### Training Parameters

In [None]:
use_custom_loss = True

test_start_date = 1941 - 28
test_end_date = 1941

train_width_date = 365 * 5
val_width_date = 28
shift_width_date = 28
min_train_date = 0

slide_list = []
for i in range(test_start_date-1,1,-shift_width_date):
    end_date = i
    split_date = end_date - val_width_date
    start_date = split_date - train_width_date
    if start_date < min_train_date:
        break
    slide_list.append([start_date,split_date,end_date])

In [None]:
# 5 times validation for fast notebook
slide_list = slide_list[:5]

In [None]:
slide_list

In [None]:
%%time

product = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')
product = product[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']].drop_duplicates()

## Make Parameter Grid

## Training

In [None]:
STORE_IDS = list(product.store_id.unique())

In [None]:
# store_id = 'CA_1'
# start_date, split_date, end_date = slide_list[0] 
# print('start_date, split_date, end_date:',start_date, split_date, end_date)
# print('store_id:',store_id)
# print('load dataset')
# df = load_data(store_id)
# day_mask = (df.d>=start_date)&(df.d<split_date)
# train = df[day_mask]
# train_ids = train.id.unique()
# NUM_ITEMS = len(train_ids)

# day_mask = (df.d>=split_date)&(df.d<=end_date)
# val = df[day_mask]
# val = val[val.id.isin(train_ids)]
# day_mask = (df.d>=test_start_date)&(df.d<=test_end_date)
# test = df[day_mask]
# del df
# gc.collect()

# test_tmp = test[test.id.isin(train_ids)]
# product_tmp = product[product.id.isin(train_ids)]

# print('calc weight')
# weight_mat_csr = get_weight_mat(product_tmp)
# weight1 = weight1_calc(product_tmp,weight_mat_csr)
# weight2_te = weight2_calc(test_tmp,product_tmp,weight_mat_csr)
# weight2_val = weight2_calc(val,product_tmp,weight_mat_csr)


In [None]:
params = {
"lambda_l1":0.0000013771438547444856,
"lambda_l2":1.8704380943506742,
"num_leaves":27,
"feature_fraction":0.8,
"bagging_fraction":0.8154703815794264,
"bagging_freq":1,
"min_child_samples":20
}

In [None]:
# for PREDICT_DAY in range(1,29): 
# #        tmp_lag = day_by_day_lag_features[PREDICT_DAY-1:]
#     remove_lag = []
#     for i in range(1, PREDICT_DAY):
#         remove_lag.append('sales_lag_{}'.format(i))
#     new_features = list(set(feature) - set(remove_lag))
#     print('train model; day', PREDICT_DAY)
#     print(remove_lag)
#     model = run_lgb(train,val,test_tmp, new_features + ['snap_' + store_id.split('_')[0]], loss_func_dict[LOSS_NAME], wrmsse, optuna_params=params)
#    # model = run_lgb_no_early_stopping(train, new_features, loss_func_dict[LOSS_NAME], wrmsse, num_boost_round = num_boost_rounds[store_id])
#     model_name = 'lgb_model_'+store_id+'_'+str(PREDICT_DAY)+'.bin'
#     pickle.dump(model, open(model_name, 'wb'))
#     del model


In [None]:
# num_boost_rounds = {
#     'CA_1': 250,
#     'CA_2': 50,
#     'CA_3': 50,
#     'CA_4': 100,
#     'TX_1': 250,
#     'TX_2': 50,    
#     'TX_3': 50,
#     'WI_1': 75,
#     'WI_2': 75,
#     'WI_3': 100,
# }

In [None]:
store_id = 'CA_1'
sub_start_date = 1942
sub_end_date = 1969
print('store_id:',store_id)
print('load dataset')
df = load_data(store_id)
day_mask = (df.d<sub_start_date)
train = df[day_mask]
train_ids = train.id.unique()
NUM_ITEMS = len(train_ids)
gc.collect()
day_mask = (te2.d>=sub_start_date)&(te2.d<=sub_end_date)
all_pred = te2[day_mask][['id','d']]
all_pred['sales'] = 0

for PREDICT_DAY in range(1,29): 
#        tmp_lag = day_by_day_lag_features[PREDICT_DAY-1:]
    remove_lag = []
    for i in range(1, PREDICT_DAY):
        remove_lag.append('sales_lag_{}'.format(i))
    new_features = list(set(feature) - set(remove_lag))
    print('train model; day', PREDICT_DAY)
#     model = run_lgb(train,val,test_tmp, new_features, loss_func_dict[LOSS_NAME], wrmsse)
    model = run_lgb_no_early_stopping(train, new_features + ['snap_' + store_id.split('_')[0]], loss_func_dict[LOSS_NAME], wrmsse, optuna_params=params, num_boost_round = 300)
    day_mask = df.d == sub_start_date + PREDICT_DAY - 1
    pred1 = model.predict(df[day_mask][new_features + ['snap_' + store_id.split('_')[0]]], num_iteration=100)
    pred2 = model.predict(df[day_mask][new_features + ['snap_' + store_id.split('_')[0]]], num_iteration=200)
    pred3 = model.predict(df[day_mask][new_features + ['snap_' + store_id.split('_')[0]]], num_iteration=300)
    pred = (pred1 + pred2 + pred3)/3.0
    pred = link_func_dict[LOSS_NAME](pred)
    day_mask2 = all_pred.d == sub_start_date + PREDICT_DAY - 1
    all_pred.loc[(all_pred.id.isin(train_ids))&(day_mask2),'sales'] += pred

    model_name = 'lgb_model_'+store_id+'_'+str(PREDICT_DAY)+'.bin'
    pickle.dump(model, open(model_name, 'wb'))
    del model

## Submission

In [None]:
all_pred

In [None]:
sub = all_pred[['d','sales','id']].pivot(index='id', columns='d', values='sales').reset_index()
sub.columns = ['id'] + ['F'+str(i) for i in range(1,29)]

In [None]:
submission = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sample_submission.csv')[['id']]
submission = submission.merge(sub, on=['id'], how='left').fillna(0)
submission.to_csv('submission.csv', index=False)

In [None]:
submission[submission.F1!=0]