In [1]:
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
from scipy import stats
import time
from tqdm import tqdm
import optuna
import yaml

import sys
sys.path.append("../utils")
from memory import reduce_mem_usage
from metrics import RMSSE, ranked_probability_score, scoring_function, binarize_predictions
from inventory import InventoryDaysPredictor

pd.set_option('display.max_columns', None)

In [2]:
input_path = "../data"
results_path = "../results"

***
## loading data

In [11]:
fold_to_use = 1

oof = (
    pd.read_csv("../results/oof_preds_xgb-m2.csv")
    .query("valid_fold == @fold_to_use")
    .reset_index(drop=True)
)
oof["inventory_days"] = oof["inventory_days"].astype(int)

***
## distribution tuning

In [12]:
# try for a weibull
def weibull_pdf(x, lam, k):
    return (k/lam) * ((x/lam)**(k-1)) * (np.exp(-x/lam)**k)

xvals = np.arange(0, 30, 0.01)
#yvals = stats.weibull_min.pdf(xvals, c=20, loc=5, scale=4)
yvals = weibull_pdf(xvals, lam=3, k=2)

#plt.plot(xvals, yvals)
#plt.show()

In [13]:
class InventoryDaysPredictor():

    def __init__(self, oof):
        self.oof = oof.set_index("sku").copy(deep=True)

    def predict_proba(self, sku, lambda1, lambda2, lambda3, lambda4):
        days_to_stockout = self.oof.loc[sku]["pred"]

        days = np.arange(1,31)
        beta = (lambda1* (days_to_stockout**lambda2)) 
        scale = lambda3*(days_to_stockout**lambda4)
        
        probs = stats.gennorm.pdf(days, loc=days_to_stockout, scale=scale, beta=beta)
        #if prob is zero, replace with uniform
        if np.sum(probs) == 0: return np.ones(30) / 30
        
        return probs/np.sum(probs)

In [14]:
def objective(trial):
    lambda1 = trial.suggest_uniform("lambda1", 0.01, 2.)
    lambda2 = trial.suggest_uniform("lambda2", 0, 1.)
    lambda3 = trial.suggest_uniform("lambda3", 0.01, 1.)
    lambda4 = trial.suggest_uniform("lambda4", 0.01, 1.)
    
    predictor = InventoryDaysPredictor(oof)
    
    preds_proba = list()
    for sku in oof.sku.values:
        preds_proba.append(predictor.predict_proba(
            sku, 
            lambda1=lambda1, 
            lambda2=lambda2,
            lambda3=lambda3, 
            lambda4=lambda4,
        ))
    preds_proba = np.array(preds_proba)
    
    rps = scoring_function(oof.inventory_days.values, preds_proba)
    return rps

In [15]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10000, timeout=28800, n_jobs=1) # 8-hrs

[32m[I 2021-08-27 00:45:59,432][0m A new study created in memory with name: no-name-9ff5088d-859e-43a6-9856-84a1143030a6[0m
[32m[I 2021-08-27 00:47:03,727][0m Trial 0 finished with value: 3.9590166565560394 and parameters: {'lambda1': 0.66194660712019, 'lambda2': 0.6798904783264639, 'lambda3': 0.9680005375719809, 'lambda4': 0.481468190551313}. Best is trial 0 with value: 3.9590166565560394.[0m
[32m[I 2021-08-27 00:48:06,777][0m Trial 1 finished with value: 4.459216450268789 and parameters: {'lambda1': 0.9680552077663548, 'lambda2': 0.9827488928714037, 'lambda3': 0.22419801978822385, 'lambda4': 0.6999323675605781}. Best is trial 0 with value: 3.9590166565560394.[0m
[32m[I 2021-08-27 00:49:09,096][0m Trial 2 finished with value: 4.4213583919187 and parameters: {'lambda1': 0.8756668619567737, 'lambda2': 0.18897269333156597, 'lambda3': 0.18536641589535685, 'lambda4': 0.6029112733070058}. Best is trial 0 with value: 3.9590166565560394.[0m
[32m[I 2021-08-27 00:50:11,869][0m Tri

In [16]:
study.trials_dataframe().sort_values("value").head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_lambda1,params_lambda2,params_lambda3,params_lambda4,state
382,382,3.428475,2021-08-27 07:18:59.978436,2021-08-27 07:20:01.884215,0 days 00:01:01.905779,0.794214,0.019486,0.99864,0.727768,COMPLETE
379,379,3.428598,2021-08-27 07:15:54.570136,2021-08-27 07:16:56.556244,0 days 00:01:01.986108,0.780514,0.018163,0.971257,0.724902,COMPLETE
191,191,3.428662,2021-08-27 04:02:30.607213,2021-08-27 04:03:32.328118,0 days 00:01:01.720905,0.778353,0.010994,0.947713,0.724388,COMPLETE
189,189,3.428675,2021-08-27 04:00:27.523578,2021-08-27 04:01:29.324480,0 days 00:01:01.800902,0.78043,0.006353,0.939871,0.733968,COMPLETE
223,223,3.428708,2021-08-27 04:35:22.453691,2021-08-27 04:36:24.165561,0 days 00:01:01.711870,0.770854,0.020938,0.947667,0.734899,COMPLETE
185,185,3.428709,2021-08-27 03:56:20.845604,2021-08-27 03:57:22.220579,0 days 00:01:01.374975,0.779615,0.013501,0.930342,0.740416,COMPLETE
285,285,3.428732,2021-08-27 05:39:03.848162,2021-08-27 05:40:05.736533,0 days 00:01:01.888371,0.774565,0.016743,0.989501,0.701983,COMPLETE
190,190,3.428753,2021-08-27 04:01:29.325491,2021-08-27 04:02:30.606177,0 days 00:01:01.280686,0.784461,0.005517,0.944554,0.717681,COMPLETE
373,373,3.428754,2021-08-27 07:09:41.572400,2021-08-27 07:10:43.699027,0 days 00:01:02.126627,0.79612,0.031577,0.999756,0.732746,COMPLETE
432,432,3.428772,2021-08-27 08:10:27.236523,2021-08-27 08:11:29.006128,0 days 00:01:01.769605,0.772946,0.001066,0.97691,0.690167,COMPLETE


In [17]:
with open(f"../config/dist_m2_hparams.yml", "w") as file:
    yaml.dump(study.best_params, file, default_flow_style=False)
    file.close()

***