In [1]:
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
from scipy import stats
import time
from tqdm import tqdm
import optuna
import yaml

from warnings import simplefilter
from optuna._experimental import ExperimentalWarning
simplefilter(action='ignore', category=ExperimentalWarning)

import sys
sys.path.append("../utils")
from memory import reduce_mem_usage
from metrics import RMSSE, ranked_probability_score, scoring_function, binarize_predictions
from inventory import InventoryDaysPredictor

pd.set_option('display.max_columns', None)

In [2]:
input_path = "../data"
results_path = "../results"

***
## loading data

In [3]:
fold_to_use = 1

oof = (
    pd.read_csv("../results/oof_preds_xgb-m2.csv")
    .query("valid_fold == @fold_to_use")
    .reset_index(drop=True)
)
oof["inventory_days"] = oof["inventory_days"].astype(int)

***
## distribution tuning

In [4]:
# try for a weibull
def weibull_pdf(x, lam, k):
    return (k/lam) * ((x/lam)**(k-1)) * (np.exp(-x/lam)**k)

xvals = np.arange(0, 30, 0.01)
#yvals = stats.weibull_min.pdf(xvals, c=20, loc=5, scale=4)
yvals = weibull_pdf(xvals, lam=3, k=2)

#plt.plot(xvals, yvals)
#plt.show()

In [5]:
class InventoryDaysPredictor():

    def __init__(self, oof):
        self.oof = oof.set_index("sku").copy(deep=True)

    def predict_proba(self, sku, lambda1, lambda2, lambda3, lambda4):
        days_to_stockout = self.oof.loc[sku]["pred"]

        days = np.arange(1,31)
        beta = (lambda1* (days_to_stockout**lambda2)) 
        scale = lambda3*(days_to_stockout**lambda4)
        
        probs = stats.gennorm.pdf(days, loc=days_to_stockout, scale=scale, beta=beta)
        #if prob is zero, replace with uniform
        if np.sum(probs) == 0: return np.ones(30) / 30
        
        return probs/np.sum(probs)

In [6]:
def objective(trial):
    lambda1 = trial.suggest_uniform("lambda1", 0.01, 2.)
    lambda2 = trial.suggest_uniform("lambda2", 0, 1.)
    lambda3 = trial.suggest_uniform("lambda3", 0.01, 2.)
    lambda4 = trial.suggest_uniform("lambda4", 0.01, 1.)
    
    predictor = InventoryDaysPredictor(oof)
    
    preds_proba = list()
    for sku in oof.sku.values:
        preds_proba.append(predictor.predict_proba(
            sku, 
            lambda1=lambda1, 
            lambda2=lambda2,
            lambda3=lambda3, 
            lambda4=lambda4,
        ))
    preds_proba = np.array(preds_proba)
    
    rps = scoring_function(oof.inventory_days.values, preds_proba)
    return rps

In [7]:
# trial to warmup the Bayesian priors
trial_0 = {
    "lambda1":0.794214,
    "lambda2":0.019486,
    "lambda3":0.998640,
    "lambda4":0.727768,
}

study = optuna.create_study(direction='minimize')
study.enqueue_trial(trial_0)
study.optimize(objective, n_trials=10000, timeout=14400, n_jobs=1) # 4-hrs

[32m[I 2021-08-27 20:03:13,662][0m A new study created in memory with name: no-name-6256e9e8-9467-444a-9eb1-46ce023e32dc[0m
[32m[I 2021-08-27 20:04:17,870][0m Trial 0 finished with value: 3.42847458709033 and parameters: {'lambda1': 0.794214, 'lambda2': 0.019486, 'lambda3': 0.99864, 'lambda4': 0.727768}. Best is trial 0 with value: 3.42847458709033.[0m
[32m[I 2021-08-27 20:05:20,573][0m Trial 1 finished with value: 4.604838085234392 and parameters: {'lambda1': 1.972856056378673, 'lambda2': 0.08986014456404678, 'lambda3': 0.4865004917084398, 'lambda4': 0.04596315257366135}. Best is trial 0 with value: 3.42847458709033.[0m
[32m[I 2021-08-27 20:06:22,604][0m Trial 2 finished with value: 4.8059591537953965 and parameters: {'lambda1': 0.141839613962989, 'lambda2': 0.10847147466691909, 'lambda3': 1.3467864129121838, 'lambda4': 0.6734544571888573}. Best is trial 0 with value: 3.42847458709033.[0m
[32m[I 2021-08-27 20:07:24,961][0m Trial 3 finished with value: 4.3169329360031465 

In [8]:
study.trials_dataframe().sort_values("value").head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_lambda1,params_lambda2,params_lambda3,params_lambda4,system_attrs_fixed_params,state
196,196,3.427668,2021-08-27 23:25:43.672616,2021-08-27 23:26:45.830471,0 days 00:01:02.157855,0.912716,0.000992,1.299781,0.66279,,COMPLETE
198,198,3.427675,2021-08-27 23:27:47.753672,2021-08-27 23:28:49.740057,0 days 00:01:01.986385,0.900056,0.004548,1.294071,0.658974,,COMPLETE
197,197,3.427876,2021-08-27 23:26:45.831518,2021-08-27 23:27:47.752629,0 days 00:01:01.921111,0.913157,0.010536,1.293512,0.66844,,COMPLETE
229,229,3.427959,2021-08-27 23:59:49.215251,2021-08-28 00:00:52.644307,0 days 00:01:03.429056,0.84617,0.017121,1.221524,0.667972,,COMPLETE
202,202,3.427959,2021-08-27 23:31:55.631943,2021-08-27 23:32:57.501967,0 days 00:01:01.870024,0.874375,0.024283,1.264563,0.669929,,COMPLETE
211,211,3.427964,2021-08-27 23:41:13.042434,2021-08-27 23:42:14.888928,0 days 00:01:01.846494,0.915685,0.022734,1.373887,0.66587,,COMPLETE
212,212,3.428012,2021-08-27 23:42:14.889963,2021-08-27 23:43:16.888233,0 days 00:01:01.998270,0.905355,0.023203,1.371363,0.664227,,COMPLETE
201,201,3.428012,2021-08-27 23:30:53.731238,2021-08-27 23:31:55.630847,0 days 00:01:01.899609,0.910512,0.016563,1.297452,0.668334,,COMPLETE
207,207,3.428079,2021-08-27 23:37:04.823468,2021-08-27 23:38:06.819887,0 days 00:01:01.996419,0.852253,0.03332,1.28784,0.664437,,COMPLETE
193,193,3.428144,2021-08-27 23:22:37.389941,2021-08-27 23:23:39.435709,0 days 00:01:02.045768,0.921359,0.001058,1.287379,0.654524,,COMPLETE


In [17]:
with open(f"../config/dist_m2_hparams.yml", "w") as file:
    yaml.dump(study.best_params, file, default_flow_style=False)
    file.close()

***