In [1]:
# !pip install azure-storage-blob
# !pip install python-dotenv

import os
from dotenv import load_dotenv
from setup_utils import fetch_data, load_data, create_time_index
from datetime import datetime
import pandas as pd
import numpy as np

CONNECTION_STRING = os.getenv("CONNECTION_STRING")

load_dotenv()
fetch_data(CONNECTION_STRING)

(
    brand_mapping_backup,
    macro_data_backup,
    brand_constraint_backup,
    pack_constraint_backup,
    segment_constraint_backup,
    sales_data_backup,
    volume_variation_constraint_backup,
) = load_data()

(
    macro_data_backup,
    sales_data_backup,
) = create_time_index([macro_data_backup, sales_data_backup])

from sklearn.metrics import make_scorer, r2_score

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)

data/brand_segment_mapping_hackathon.xlsx
data/macro_data.xlsx
data/maximum_discount_constraint_hackathon.xlsx
data/sales_data_hackathon.xlsx
data/volume_variation_constraint_hackathon.xlsx
data/submission_template_hackathon.csv


In [53]:
import tensorflow as tf

In [92]:
brand_mapping = brand_mapping_backup.copy(deep=True)
macro_data = macro_data_backup.copy(deep=True)
brand_constraint = brand_constraint_backup.copy(deep=True)
pack_constraint = pack_constraint_backup.copy(deep=True)
segment_constraint = segment_constraint_backup.copy(deep=True)
sales_data = sales_data_backup.copy(deep=True)
volume_variation_constraint = volume_variation_constraint_backup.copy(deep=True)

sales_index = sales_data.index.unique()
macro_data = macro_data.loc[sales_index, ['retail_sales_index', 'unemployment_rate', 'cpi', 'gross_domestic_saving',]].sort_index()
covid = pd.Series([1 if (i<=datetime(2020,5,1) and i>=datetime(2020,3,1)) else 0 for i in macro_data.index], index=sales_index, name="covid")
macro_data = macro_data.join(covid)

In [93]:
constraints_dict = {
    "brand" : brand_constraint,
    "pack" : pack_constraint,
    "segment" : segment_constraint,
    "volume_variation" : volume_variation_constraint
}

In [94]:
temp_data = sales_data[sales_data.gto.isna()].reset_index()
temp_data["month"] = temp_data.date.dt.month
temp_data["year"] = temp_data.date.dt.year
temp_data = temp_data.fillna(10000)
temp_data = temp_data.merge(brand_mapping)

master_mapping = temp_data[["sku", "pack", "brand", "segment"]].drop_duplicates().reset_index(drop=True)

In [95]:
def _create_encodings(master_map):

    def label_encoder(series):
        unique_values = series.sort_values().unique()
        unique_count =  series.nunique()

        return dict(zip(unique_values, range(len(unique_values))))

    def mapper(col_val, col_key="sku"):

        df = master_map[[col_key, col_val]].drop_duplicates()
        df.loc[:,col_val] = df[col_val].map(label_dict[col_val])
        df.loc[:,col_key] = df[col_key].map(label_dict[col_key])

        return df.set_index(col_key).to_dict()[col_val]

    label_dict = {col:label_encoder(master_map[col]) for col in master_map.columns}
    mapper_dict = {col:mapper(col) for col in master_map.columns if col!="sku"}

    return {"label_dict" : label_dict, "mapper_dict" : mapper_dict}

final_encodings = _create_encodings(master_mapping)

In [96]:
total_sku_list = np.sort(sales_data.sku.unique()).tolist()
target_sku_list = list(final_encodings["label_dict"]["sku"].keys())
non_target_sku_list = [i for i  in total_sku_list if i not in final_encodings["label_dict"]["sku"]]
sku_index_order = [*target_sku_list, *non_target_sku_list]

In [97]:
def _constraint_tensor_generate(constraint, encoding, key):

    encoding_length = max(encoding["label_dict"][key].values())+1
    constraint = constraint.copy(deep=True)
    constraint = constraint.replace(encoding["label_dict"][key]).sort_values(["month", key])
    constraint = constraint.groupby(["month", key]).max_discount.sum().sort_index().unstack(1)

    constraint = pd.DataFrame(columns=pd.Index(range(0,encoding_length), dtype='int64', name="brand"), index=pd.Index(range(6,8), dtype='int64')).fillna(constraint).fillna(0.0).to_numpy()

    return constraint

brand_constraint_tensor = _constraint_tensor_generate(constraints_dict["brand"], final_encodings, "brand")
pack_constraint_tensor = _constraint_tensor_generate(constraints_dict["pack"], final_encodings, "pack")
segment_constraint_tensor = _constraint_tensor_generate(constraints_dict["segment"], final_encodings, "segment")

In [98]:
macro_data = macro_data.loc[sales_index].sort_index()
macro_data = (macro_data/macro_data.mean()-1).copy(deep=True)
macro_data = macro_data.astype(np.float64).values
macro_data = np.expand_dims(macro_data, 1)

In [99]:
nr_data = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])
    .net_revenue.sum()
    .sort_index()
    .unstack(1)
    [sku_index_order]
    .clip(0.0, None)
    .fillna(0.0)
    .astype(np.float64)
    .values
)
nr_data_mask = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])
    .net_revenue.sum()
    .sort_index()
    .unstack(1)
    [sku_index_order]
    .applymap(lambda x: x if x>=0 else np.nan)
    .notna()
    .astype(np.float64)
    .values
)

nr_shifted = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])
    .net_revenue.sum()
    .sort_index()
    .unstack(1)
    [sku_index_order]
    .applymap(lambda x: x if x>=0 else np.nan)
    .clip(0.0, None)
    .shift(1)
    .fillna(method="bfill")
    .fillna(0.0)
    .astype(np.float64)
    .values
)

volume_data = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])
    .volume.sum()
    .sort_index()
    .unstack(1)
    [sku_index_order]
    .clip(0.0, None)
    .fillna(0.0)
    .astype(np.float64)
    .values
)


discount_data = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])[["promotional_discount", "other_discounts"]].sum()
    .sort_index()
    .stack()
    .unstack(1)
    [sku_index_order]
    .fillna(0.0)
    .clip(None, 0)
)
discount_data = np.swapaxes(discount_data.astype(np.float64).values.reshape(55,2,discount_data.shape[1]), 1, 2)

nr_shifted[-1, :] = nr_shifted[-2, :]
nr_shifted = nr_shifted.sum(axis=1)

In [100]:
scaler = nr_data.mean()
vol_scaler = volume_data.mean()

nr_data = nr_data/scaler
discount_data = discount_data/scaler
nr_shifted = nr_shifted/scaler
brand_constraint_tensor = brand_constraint_tensor/scaler
pack_constraint_tensor = pack_constraint_tensor/scaler
segment_constraint_tensor = segment_constraint_tensor/scaler

volume_data = volume_data/vol_scaler

nr_shifted = np.expand_dims(nr_shifted/nr_shifted.mean(),1)

In [101]:
time_index_array = np.expand_dims(np.arange(1, macro_data.shape[0]+1), 1)/400

In [111]:
tf.compat.v1.reset_default_graph()
tf.compat.v1.enable_eager_execution()

num_splitter = 40

y = tf.constant(nr_data[:num_splitter], dtype=tf.float64)
y_mask = tf.constant(nr_data_mask[:num_splitter], dtype=tf.float64)

discounts = tf.constant(discount_data[:num_splitter], dtype=tf.float64)
mixed_effect = tf.constant(macro_data[:num_splitter], dtype=tf.float64)
time_index = tf.constant(np.expand_dims(np.arange(1, macro_data.shape[0]+1), 1)[:num_splitter]/400, dtype=tf.float64)
shifted_nr = tf.constant(nr_shifted[:num_splitter], dtype=tf.float64)
y_vol = tf.constant(volume_data[:num_splitter], dtype=tf.float64)

val_splitter_ = tf.constant(5, dtype=tf.int32)
val_splitter = 3 #if val_splitter_ == 5 else 2


initial_discount_var = tf.constant(discount_data[-2:], dtype=tf.float64)
mixed_effect_var = tf.constant(macro_data[-2:], dtype=tf.float64)
time_index_var = tf.constant(np.expand_dims(np.arange(1, macro_data.shape[0]+1), 1)[-2:]/400, dtype=tf.float64)
y_mask_var = tf.constant(nr_data_mask[-2:], dtype=tf.float64)
shifted_nr_var = tf.constant(nr_shifted[-2:], dtype=tf.float64)


In [148]:
tf.compat.v1.reset_default_graph()
tf.compat.v1.disable_eager_execution()

sess = tf.compat.v1.Session()

#Y
y = tf.compat.v1.placeholder(dtype=tf.float64, name="nr_actual")
y_mask = tf.compat.v1.placeholder(dtype=tf.float64, name="nr_mask")

# X
discounts = tf.compat.v1.placeholder(dtype=tf.float64, name="discounts")
mixed_effect = tf.compat.v1.placeholder(dtype=tf.float64, name="mixed_effects")
time_index = tf.compat.v1.placeholder(dtype=tf.float64, name="time_index")
shifted_nr = tf.compat.v1.placeholder(dtype=tf.float64, name="shifted_nr")
y_vol = tf.compat.v1.placeholder(dtype=tf.float64, name="volume_actual")

val_splitter_ = tf.compat.v1.placeholder(dtype=tf.int32)
val_splitter = 3 #if val_splitter_ == 5 else 2

initial_discount_var = tf.compat.v1.placeholder(dtype=tf.float64, name="initial_discount_submit")

mixed_effect_var = tf.compat.v1.placeholder(dtype=tf.float64, name="mixed_effect_submit")
time_index_var = tf.compat.v1.placeholder(dtype=tf.float64, name="time_index_submit")
y_mask_var = tf.compat.v1.placeholder(dtype=tf.float64, name="y_mask_submit")
shifted_nr_var = tf.compat.v1.placeholder(dtype=tf.float64, name="shifted_nr_submit")

In [149]:
dim_size = (1,nr_data.shape[1])
me_size = macro_data.shape[-1]

baseline_intercept = tf.Variable(np.expand_dims((nr_data.mean(0)*0.3), 0), dtype=tf.float64)

baseline_slope1_global = tf.Variable(np.full((1,1), 0.1), dtype=tf.float64)
baseline_slope1_hier = tf.Variable(np.full(dim_size, 0.1), dtype=tf.float64)

baseline_slope2_global = tf.Variable(np.full((1,1), 0.1), dtype=tf.float64)
baseline_slope2_hier = tf.Variable(np.full(dim_size, 0.1), dtype=tf.float64)

mixed_effect_mult_global = tf.Variable(np.random.normal(loc=0, size=(1, 1, me_size)), dtype=tf.float64)
mixed_effect_mult_hier = tf.Variable(np.random.normal(loc=0, size=(*dim_size, me_size)), dtype=tf.float64)

discount_mult_global = tf.math.sigmoid(tf.Variable(np.random.normal(loc=0, size=(1, 1, 2)), dtype=tf.float64))*2
discount_mult_hier = tf.math.sigmoid(tf.Variable(np.random.normal(loc=0, size=(*dim_size, 2)), dtype=tf.float64))*2

discount_slope_global = tf.math.sigmoid(tf.Variable(np.random.normal(loc=0, size=(1, 1, 2)), dtype=tf.float64))*2
discount_slope_hier = tf.math.sigmoid(tf.Variable(np.random.normal(loc=0, size=(*dim_size, 2)), dtype=tf.float64))*2

roi_mults_global = tf.Variable(np.random.normal(loc=0, size=(1, 1, me_size)), dtype=tf.float64)
roi_mults_hier = tf.Variable(np.random.normal(loc=0, size=(*dim_size, me_size)), dtype=tf.float64)

nr_to_vol_slope = tf.Variable(np.random.normal(loc=0, size=dim_size), dtype=tf.float64)
nr_to_vol_intercept = tf.Variable(np.random.normal(loc=0, size=dim_size), dtype=tf.float64)

hier_var_list = [baseline_slope1_hier, mixed_effect_mult_hier, discount_slope_hier, roi_mults_hier, discount_mult_hier, baseline_slope2_hier]
global_var_list = [baseline_slope1_global, mixed_effect_mult_global, discount_slope_global, roi_mults_global, nr_to_vol_slope, nr_to_vol_intercept, discount_mult_global, baseline_slope2_global]

discounts_var = tf.Variable(initial_discount_var, dtype=tf.float64)
sliced_discount_var = tf.slice(discounts_var, begin=[0,0,0], size=[2,151,-1])

In [150]:
@tf.function
def model(
        base_intercept_in,
        base_slope1_global_in,
        base_slope1_hier_in,
        base_slope2_global_in,
        base_slope2_hier_in,
        mixed_effect_mult_global_in,
        mixed_effect_mult_hier_in,
        discount_mult_global_in,
        discount_mult_hier_in,
        discount_slope_global_in,
        discount_slope_hier_in,
        roi_mults_global_in,
        roi_mults_hier_in,
        nr_to_vol_slope_in,
        nr_to_vol_intercept_in,
        time_index_in,
        mixed_effect_in,
        discounts_in,
        y_mask_in,
        shifted_nr_in
    ):
    base_slope1_in = base_slope1_global_in + base_slope1_hier_in
    base_slope2_in = base_slope2_global_in + base_slope2_hier_in
    mixed_effect_mult_in = mixed_effect_mult_global_in + mixed_effect_mult_hier_in
    discount_mult_in = discount_mult_global_in + discount_mult_hier_in
    discount_slope_in = discount_slope_global_in + discount_slope_hier_in
    roi_mults_in = roi_mults_global_in + roi_mults_hier_in

    base1_in = tf.multiply(base_slope1_in, time_index_in) + base_intercept_in
    base2_in = base1_in + tf.multiply(base_slope2_in, shifted_nr_in)
    mixed_effect_impact_in = 1 + tf.nn.tanh(tf.multiply(mixed_effect_in, mixed_effect_mult_in))
    total_mixed_effect_impact_in = tf.reduce_prod(mixed_effect_impact_in, axis=-1)
    discount_impact_in = tf.multiply(discount_slope_in, tf.math.log1p(tf.multiply(discount_mult_in, tf.nn.relu(-discounts_in))))
    roi_mult_impact_in = 1 + tf.nn.tanh(tf.multiply(mixed_effect_impact_in, roi_mults_in))
    total_roi_mult_impact_in = tf.expand_dims(tf.reduce_prod(roi_mult_impact_in, axis=-1), axis=-1)

    y_pred_out = tf.multiply(
        y_mask_in,
        (
            tf.multiply(base2_in, total_mixed_effect_impact_in)
            + tf.reduce_sum(
                tf.multiply(discount_impact_in, total_roi_mult_impact_in), axis=-1)
        )
    )

    y_vol_pred_out = nr_to_vol_intercept_in + tf.multiply(y_pred_out, nr_to_vol_slope_in)

    return y_pred_out, y_vol_pred_out

@tf.function
def wape(y_actual, y_prediction):
    return tf.reduce_sum(tf.math.abs(y_actual - y_prediction))/tf.reduce_sum(y_actual)

@tf.function
def mse(y_actual, y_prediction):
    return tf.reduce_sum(tf.math.square(y_actual - y_prediction))

@tf.function
def _tensor_gather(tensor_to_gather_in, encoding, key):
    encoding = pd.Series(encoding["mapper_dict"][key]).sort_index().to_numpy()
    segment_ids = tf.constant(encoding, dtype=tf.int32)
    x_transpose = tf.transpose(tensor_to_gather_in, perm=[1,0,2])
    x_gathered = tf.math.unsorted_segment_sum(x_transpose, segment_ids, num_segments=encoding.max()+1)
    x_gathered_transpose = tf.reduce_mean(tf.transpose(x_gathered, perm=[1,0,2]), axis=2)

    return x_gathered_transpose

In [151]:
y_pred, y_vol_pred = model(
    baseline_intercept,
    baseline_slope1_global,
    baseline_slope1_hier,
    baseline_slope2_global,
    baseline_slope2_hier,
    mixed_effect_mult_global,
    mixed_effect_mult_hier,
    discount_mult_global,
    discount_mult_hier,
    discount_slope_global,
    discount_slope_hier,
    roi_mults_global,
    roi_mults_hier,
    nr_to_vol_slope,
    nr_to_vol_intercept,
    time_index,
    mixed_effect,
    discounts,
    y_mask,
    shifted_nr
)

y_inital_pred, y_initial_vol_pred = model(
    baseline_intercept,
    baseline_slope1_global,
    baseline_slope1_hier,
    baseline_slope2_global,
    baseline_slope2_hier,
    mixed_effect_mult_global,
    mixed_effect_mult_hier,
    discount_mult_global,
    discount_mult_hier,
    discount_slope_global,
    discount_slope_hier,
    roi_mults_global,
    roi_mults_hier,
    nr_to_vol_slope,
    nr_to_vol_intercept,
    time_index_var,
    mixed_effect_var,
    initial_discount_var,
    y_mask_var,
    shifted_nr_var
)

slice_y_inital_pred = tf.slice(y_inital_pred, begin=[0,0], size=[-1, 151])
slice_y_initial_vol_pred = tf.slice(y_initial_vol_pred, begin=[0,0], size=[-1, 151])


y_opt_pred, y_opt_vol_pred = model(
    baseline_intercept,
    baseline_slope1_global,
    baseline_slope1_hier,
    baseline_slope2_global,
    baseline_slope2_hier,
    mixed_effect_mult_global,
    mixed_effect_mult_hier,
    discount_mult_global,
    discount_mult_hier,
    discount_slope_global,
    discount_slope_hier,
    roi_mults_global,
    roi_mults_hier,
    nr_to_vol_slope,
    nr_to_vol_intercept,
    time_index_var,
    mixed_effect_var,
    discounts_var,
    y_mask_var,
    shifted_nr_var
)

slice_y_opt_pred = tf.slice(y_opt_pred, begin=[0,0], size=[-1, 151])
slice_y_opt_vol_pred = tf.slice(y_opt_vol_pred, begin=[0,0], size=[-1, 151])

In [152]:
discount_var_brand = _tensor_gather(sliced_discount_var, final_encodings, "brand")
discount_var_pack = _tensor_gather(sliced_discount_var, final_encodings, "pack")
discount_var_segment = _tensor_gather(sliced_discount_var, final_encodings, "segment")

brand_constraint_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(brand_constraint_tensor - discount_var_brand)))
pack_constraint_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(pack_constraint_tensor - discount_var_pack)))
segment_constraint_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(segment_constraint_tensor - discount_var_segment)))

volume_variation_constraint_temp = pd.DataFrame(index=target_sku_list, columns= ["minimum_volume_variation", "maximum_volume_variation"]).fillna(constraints_dict["volume_variation"].set_index("sku")[["minimum_volume_variation", "maximum_volume_variation"]])
volume_variation_constraint_mask = volume_variation_constraint_temp.notna().any(axis=1).to_numpy()

vol_var_cons_ref_tensor = tf.boolean_mask(tf.stack([y_vol[-1,:151], y_opt_vol_pred[0,:151]], axis=0), volume_variation_constraint_mask, axis=1)
vol_var_cons_vol_tensor = tf.boolean_mask(tf.slice(y_opt_vol_pred, begin=[0,0], size=[-1, 151]), volume_variation_constraint_mask, axis=1)
vol_var_cons_val = tf.constant(np.expand_dims(volume_variation_constraint_temp.dropna().values, axis=0), dtype=tf.float64)

vol_var_cons_lower = tf.multiply(vol_var_cons_ref_tensor, 1 + vol_var_cons_val[:,:,0])
vol_var_cons_upper = tf.multiply(vol_var_cons_ref_tensor, 1 + vol_var_cons_val[:,:,1])


volume_sku_constraint_upper_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(vol_var_cons_vol_tensor - vol_var_cons_upper)))
volume_sku_constraint_lower_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(vol_var_cons_lower - vol_var_cons_vol_tensor)))

negative_discount_loss = tf.reduce_sum(tf.math.square(tf.nn.relu(sliced_discount_var)))

nr_increase = tf.reduce_sum(slice_y_opt_pred - slice_y_inital_pred)
investment = -tf.reduce_sum(discounts_var)
roi = tf.divide(tf.reduce_sum(slice_y_opt_pred - slice_y_inital_pred), -tf.reduce_sum(sliced_discount_var))

loss_roi = (
    -1e2*roi
    +1e5*brand_constraint_loss
    +1e5*pack_constraint_loss
    +1e5*segment_constraint_loss
    +1e5*volume_sku_constraint_upper_loss
    +1e5*volume_sku_constraint_lower_loss
    +1e5*negative_discount_loss
    -1e1*nr_increase
    # +0.1*investment
)

In [153]:
@tf.function
def splitter_func(tensor_to_split):
    return [*tf.split(tf.slice(tensor_to_split, begin=[0,0], size=[34, -1]),2), tf.slice(tensor_to_split, begin=[34,0], size=[-1,-1])]

In [154]:
y_split = splitter_func(y)
y_pred_split = splitter_func(y_pred)

y_vol_split = splitter_func(y_vol)
y_vol_pred_split = splitter_func(y_vol_pred)


# loss
total_wape = tf.math.reduce_mean([wape(y_split[i], y_pred_split[i]) for i in range(0,val_splitter)]) + wape(tf.slice(y, begin=[0,0], size=[-1, 151]), tf.slice(y_pred, begin=[0,0], size=[-1, 151]))
special_wape = wape(tf.slice(y, begin=[0,0], size=[-1, 151]), tf.slice(y_pred, begin=[0,0], size=[-1, 151]))
total_mse = mse(y, y_pred)
actual_wape = wape(y, y_pred)

total_wape_vol = tf.math.reduce_mean([wape(y_vol_split[i], y_vol_pred_split[i]) for i in range(0,val_splitter)]) + wape(tf.slice(y_vol, begin=[0,0], size=[-1, 151]), tf.slice(y_vol_pred, begin=[0,0], size=[-1, 151]))
total_mse_vol = mse(y_vol, y_vol_pred)
actual_wape_vol = wape(y_vol, y_vol_pred)


reg1 = sum([tf.reduce_sum(tf.square(i)) for i in hier_var_list])
reg2 = sum([tf.reduce_sum(tf.square(i)) for i in global_var_list])

loss = (
    1e3*total_wape_vol
    +1e1*total_mse_vol
    +1e3*total_wape
    +1e3*special_wape
    +1e1*total_mse
    +1e1*reg2
    +1e1*reg1
    +1e3*volume_sku_constraint_upper_loss
    +1e3*volume_sku_constraint_lower_loss

)

In [155]:
splitter = 53
feed_dict1 = {
    discounts : discount_data[:splitter],
    mixed_effect: macro_data[:splitter],
    y_vol : volume_data[:splitter],
    y : nr_data[:splitter],
    shifted_nr : nr_shifted[:splitter],
    y_mask : nr_data_mask[:splitter],
    time_index : time_index_array[:splitter],
    val_splitter_ : 5,
    initial_discount_var : discount_data[-2:],
    mixed_effect_var : macro_data[-2:],
    time_index_var : time_index_array[-2:],
    y_mask_var : nr_data_mask[-2:],
    shifted_nr_var : nr_shifted[-2:]
}


# feed_dict1 = {
#     discounts : discount_data[:splitter],
#     mixed_effect: macro_data[:splitter],
#     y_vol : volume_data[:splitter],
#     y : nr_data[:splitter],
#     # shifted_nr : nr_shifted[:splitter],
#     y_mask : nr_data_mask[:splitter],
#     time_index : time_index_array[:splitter],
#     val_splitter_ : 5,
#     initial_discount_var : discount_data[-2:],
#     mixed_effect_var : macro_data[-2:],
#     time_index_var : time_index_array[-2:],
#     y_mask_var : nr_data_mask[-2:]
# }

# feed_dict2 = {
#     discounts : discount_data[splitter:-5],
#     mixed_effect: macro_data[splitter:-5],
#     y_vol : volume_data[splitter:-5],
#     y : nr_data[splitter:-5],
#     # shifted_nr : nr_shifted[splitter:-5],
#     y_mask : nr_data_mask[splitter:-5],
#     time_index : time_index_array[splitter:-5],
#     val_splitter_ : 5,
#     initial_discount_var : discount_data[-2:],
#     mixed_effect_var : macro_data[-2:],
#     time_index_var : time_index_array[-2:],
#     y_mask_var : nr_data_mask[-2:]
# }

# feed_dict3 = {
#     discounts : discount_data[-2:],
#     mixed_effect: macro_data[-2:],
#     y_vol : volume_data[-2:],
#     y : nr_data[-2:],
#     # shifted_nr : nr_shifted[-2:],
#     y_mask : nr_data_mask[-2:],
#     time_index : time_index_array[-2:],
#     val_splitter_ : 5
# }

In [156]:
epoch = 0
# optimizer
lr = lambda x : 1 / np.power(x/5 + 10, 1/2)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=lr(epoch))#, beta1=0.1, beta2=0.1)
train = optimizer.minimize(loss)

In [157]:
[lr(i) for i in [0, 1, 10, 100, 1000, 10000, 20000]]#, 50000, 80000]]

[0.31622776601683794,
 0.31311214554257477,
 0.2886751345948129,
 0.18257418583505536,
 0.06900655593423542,
 0.022304986837273527,
 0.015791661046371634]

In [158]:
epoch = 0
# optimizer
lr2 = lambda x : 0.5 / np.power(x/5 + 10, 1/2.5)
optimizer_roi = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)#, beta1=0.1, beta2=0.1)
train_roi = optimizer_roi.minimize(loss_roi, var_list=[discounts_var])

In [159]:
[lr2(i) for i in [0, 1, 10, 100, 1000, 10000, 20000, 50000, 80000]]


[0.19905358527674863,
 0.19748309985118997,
 0.18505358624357665,
 0.1282689390121013,
 0.05889552350375349,
 0.023861161564834817,
 0.018101403746176976,
 0.01255441189851496,
 0.010404314501437794]

In [160]:
# initialize variables
init = tf.compat.v1.global_variables_initializer()
sess.run(init, feed_dict1)

In [161]:
metric_update_track = {
    "epoch" : [],
    "actual_wape" : [],
    "test_wape" : [],
    "loss" : [],
    "mse" : [],
    "reg1" : [],
    "reg2" : []
}

# train model
num_epochs = 60000
for epoch in range(num_epochs):
    (
        _,
        current_loss,
        current_wape,
        # current_mse,
        current_wape_vol,
        # current_mse_vol,
        current_reg1,
        current_reg2,
        current_volume_sku_constraint_upper_loss,
        current_volume_sku_constraint_lower_loss,

    )= sess.run([
        train,
        loss,
        actual_wape,
        # total_mse,
        actual_wape_vol,
        # total_mse_vol,
        reg1,
        reg2,
        volume_sku_constraint_upper_loss,
        volume_sku_constraint_lower_loss,

    ], feed_dict1)

    current_wape_test, current_wape_vol_test = sess.run([actual_wape, actual_wape_vol], feed_dict1)


    if (epoch + 1) % 250 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {current_loss:.4f}, WAPE: {current_wape:.4f}, WAPE_TEST: {current_wape_test:.4f}, WAPE_VOL: {current_wape_vol:.4f}, WAPE_VOL_TEST: {current_wape_vol_test:.4f}, reg1: {current_reg1:.4f}, reg2: {current_reg2:.4f}, VOL_CONS_UPPER_LOSS: {current_volume_sku_constraint_upper_loss:.4f}, VOL_CONS_LOWER_LOSS: {current_volume_sku_constraint_lower_loss:.4f}")
        # metric_update_track["epoch"].append(epoch)
        # metric_update_track["actual_wape"].append(current_wape)
        # metric_update_track["test_wape"].append(current_wape_test)
        # metric_update_track["loss"].append(current_loss)
        # metric_update_track["mse"].append(current_mse)
        # metric_update_track["reg1"].append(current_reg1)
        # metric_update_track["reg2"].append(current_reg2)



#         # Training loop
# num_epochs = 500
# for epoch in range(num_epochs):
#     _, current_error, cuurent_mse, current_m1, current_m2, current_c = sess.run([train_op, error, mse_error, m1, m2, c])
#     if (epoch + 1) % 25 == 0:
#         print(f"Epoch {epoch + 1}/{num_epochs}, Error: {current_error:.4f}, MSE: {cuurent_mse:.4f}, m1: {current_m1}, m2: {current_m2}, c: {current_c}")

# # Print the final results for 'm' and 'c'
# final_m1, final_m2, final_c = sess.run([m1, m2, c])
# print(f"Final 'm1' value: {final_m1}")

# print(f"Final 'm2' value: {final_m2}")
# print(f"Final 'c' value: {final_c}")

Epoch 250/60000, Loss: 760848.9092, WAPE: 0.2081, WAPE_TEST: 0.2069, WAPE_VOL: 0.2017, WAPE_VOL_TEST: 0.1971, reg1: 2329.8390, reg2: 699.7108, VOL_CONS_UPPER_LOSS: 4.3630, VOL_CONS_LOWER_LOSS: 1.4195
Epoch 500/60000, Loss: 593104.3897, WAPE: 0.1947, WAPE_TEST: 0.1929, WAPE_VOL: 0.1732, WAPE_VOL_TEST: 0.1704, reg1: 3030.8252, reg2: 613.1645, VOL_CONS_UPPER_LOSS: 0.0912, VOL_CONS_LOWER_LOSS: 0.8397
Epoch 750/60000, Loss: 502945.4891, WAPE: 0.1802, WAPE_TEST: 0.1801, WAPE_VOL: 0.1575, WAPE_VOL_TEST: 0.1574, reg1: 2845.4890, reg2: 557.7905, VOL_CONS_UPPER_LOSS: 0.5918, VOL_CONS_LOWER_LOSS: 0.0689
Epoch 1000/60000, Loss: 495661.9720, WAPE: 0.1770, WAPE_TEST: 0.1770, WAPE_VOL: 0.1552, WAPE_VOL_TEST: 0.1551, reg1: 2829.7340, reg2: 576.1805, VOL_CONS_UPPER_LOSS: 0.8935, VOL_CONS_LOWER_LOSS: 0.0539
Epoch 1250/60000, Loss: 535924.9006, WAPE: 0.1834, WAPE_TEST: 0.1829, WAPE_VOL: 0.1673, WAPE_VOL_TEST: 0.1660, reg1: 2786.5664, reg2: 619.3729, VOL_CONS_UPPER_LOSS: 0.0333, VOL_CONS_LOWER_LOSS: 0.363

In [162]:
metric_update_track = {
    "epoch" : [],
    "actual_wape" : [],
    "test_wape" : [],
    "loss" : [],
    "mse" : [],
    "reg1" : [],
    "reg2" : []
}

# train model
num_epochs = 80000
for epoch in range(num_epochs):
    (
        _,
        current_loss_roi,
        current_roi,
        current_brand_constraint_loss,
        current_pack_constraint_loss,
        current_segment_constraint_loss,
        current_volume_sku_constraint_upper_loss,
        current_volume_sku_constraint_lower_loss,
        current_negative_discount_loss

        # current_wape,
        # current_mse,
        # current_wape_vol,
        # current_mse_vol,
        # current_reg1,
        # current_reg2
    )= sess.run([
        train_roi,
        loss_roi,
        roi,
        brand_constraint_loss,
        pack_constraint_loss,
        segment_constraint_loss,
        volume_sku_constraint_upper_loss,
        volume_sku_constraint_lower_loss,
        negative_discount_loss
        # actual_wape,
        # total_mse,
        # actual_wape_vol,
        # total_mse_vol,
        # reg1,
        # reg2
    ], feed_dict1)

    if (epoch + 1) % 250 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {current_loss_roi:.4f}, ROI: {current_roi:.4f}, BRAND_LOSS: {current_brand_constraint_loss:.4f}, PACK_LOSS: {current_pack_constraint_loss:.4f}, SEGMENT_LOSS: {current_segment_constraint_loss:.4f}, VOL_CONS_UPPER_LOSS: {current_volume_sku_constraint_upper_loss:.4f}, VOL_CONS_LOWER_LOSS: {current_volume_sku_constraint_lower_loss:.4f}, NEGATIVE_LOSS: {current_negative_discount_loss:.4f}")
        # ")#, WAPE_TEST: {current_wape_test:.4f}, WAPE_VOL: {current_wape_vol:.4f}, WAPE_VOL_TEST: {current_wape_vol_test:.4f}, reg1: {current_reg1:.4f}, reg2: {current_reg2:.4f}")
        # metric_update_track["epoch"].append(epoch)
        # metric_update_track["actual_wape"].append(current_wape)
        # metric_update_track["test_wape"].append(current_wape_test)
        # metric_update_track["loss"].append(current_loss)
        # metric_update_track["mse"].append(current_mse)
        # metric_update_track["reg1"].append(current_reg1)
        # metric_update_track["reg2"].append(current_reg2)



#         # Training loop
# num_epochs = 500
# for epoch in range(num_epochs):
#     _, current_error, cuurent_mse, current_m1, current_m2, current_c = sess.run([train_op, error, mse_error, m1, m2, c])
#     if (epoch + 1) % 25 == 0:
#         print(f"Epoch {epoch + 1}/{num_epochs}, Error: {current_error:.4f}, MSE: {cuurent_mse:.4f}, m1: {current_m1}, m2: {current_m2}, c: {current_c}")

# # Print the final results for 'm' and 'c'
# final_m1, final_m2, final_c = sess.run([m1, m2, c])
# print(f"Final 'm1' value: {final_m1}")

# print(f"Final 'm2' value: {final_m2}")
# print(f"Final 'c' value: {final_c}")

Epoch 250/80000, Loss: 113523631.1456, ROI: -1.4258, BRAND_LOSS: 289.6229, PACK_LOSS: 14.1913, SEGMENT_LOSS: 0.0000, VOL_CONS_UPPER_LOSS: 0.6921, VOL_CONS_LOWER_LOSS: 1.4670, NEGATIVE_LOSS: 829.2596
Epoch 500/80000, Loss: 75899585.4421, ROI: -0.6549, BRAND_LOSS: 81.7292, PACK_LOSS: 6.3827, SEGMENT_LOSS: 0.0000, VOL_CONS_UPPER_LOSS: 0.6921, VOL_CONS_LOWER_LOSS: 1.4757, NEGATIVE_LOSS: 668.7134
Epoch 750/80000, Loss: 63113660.3536, ROI: -0.3171, BRAND_LOSS: 51.4347, PACK_LOSS: 7.0079, SEGMENT_LOSS: 0.0000, VOL_CONS_UPPER_LOSS: 0.6921, VOL_CONS_LOWER_LOSS: 1.4886, NEGATIVE_LOSS: 570.5116
Epoch 1000/80000, Loss: 56497133.2793, ROI: -0.1126, BRAND_LOSS: 45.5163, PACK_LOSS: 6.8781, SEGMENT_LOSS: 0.0000, VOL_CONS_UPPER_LOSS: 0.6921, VOL_CONS_LOWER_LOSS: 1.5043, NEGATIVE_LOSS: 510.3798
Epoch 1250/80000, Loss: 52425341.4287, ROI: 0.0592, BRAND_LOSS: 42.2287, PACK_LOSS: 6.6369, SEGMENT_LOSS: 0.0000, VOL_CONS_UPPER_LOSS: 0.6921, VOL_CONS_LOWER_LOSS: 1.5220, NEGATIVE_LOSS: 473.1741
Epoch 1500/80000

In [163]:
vol_actual_submit, nr_actual_submit, vol_opt_submit, nr_opt_submit, optimal_discount = sess.run([y_initial_vol_pred, y_inital_pred, y_opt_vol_pred, y_opt_pred, discounts_var], feed_dict1)

nr_actual_submit = nr_actual_submit * scaler
vol_actual_submit = vol_actual_submit * vol_scaler
nr_opt_submit = nr_opt_submit * scaler
vol_opt_submit = vol_opt_submit * vol_scaler

nr_data_temp = (
    sales_data
    .reset_index()
    .groupby(["date", "sku"])
    .net_revenue.sum()
    .sort_index()
    .unstack(1)
    [sku_index_order]
)

nr_actual_submit = pd.DataFrame(nr_actual_submit, index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)
nr_opt_submit = pd.DataFrame(nr_opt_submit, index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)
vol_actual_submit = pd.DataFrame(vol_actual_submit, index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)
vol_opt_submit = pd.DataFrame(vol_opt_submit, index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)
promo_optimal_discount = pd.DataFrame(optimal_discount[:,:,0], index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)
other_optimal_discount = pd.DataFrame(optimal_discount[:,:,1], index=nr_data_temp.index[-2:], columns=nr_data_temp.columns)

submit_temp = sales_data[sales_data.gto.isna()].reset_index().set_index(["date", "sku", "brand", "pack", "size"]).sort_index()
submit_temp.loc[:, "net_revenue"] = submit_temp.net_revenue.fillna(nr_actual_submit.stack())
submit_temp.loc[:, "volume"] = submit_temp.volume.fillna(vol_actual_submit.stack())
submit_temp.loc[:, "net_revenue_opt"] = submit_temp.net_revenue.fillna(nr_actual_submit.stack())
submit_temp.loc[:, "volume_opt"] = submit_temp.volume.fillna(vol_actual_submit.stack())
submit_temp.loc[:, "promotional_discount"] = np.nan
submit_temp.loc[:, "other_discounts"] = np.nan
submit_temp.loc[:, "promotional_discount"] = submit_temp.net_revenue.fillna(promo_optimal_discount.stack())
submit_temp.loc[:, "other_discounts"] = submit_temp.volume.fillna(other_optimal_discount.stack())
submit_temp = submit_temp.reset_index()


cols_req = [ "Year", "Month", "SKU", "Brand", "Pack", "Size", "Volume_Estimate", "Net_Revenue_Estimate", "Optimal_Promotional_Discount", "Optimal_Other_Discounts", "Optimal_Volume", "Optimal_Net_Revenue"]


submit_temp.loc[:, "Year"] = submit_temp.date.dt.year
submit_temp.loc[:, "Month"] = submit_temp.date.dt.month
submit_temp.loc[:, "SKU"] = submit_temp.sku
submit_temp.loc[:, "Brand"] = submit_temp.brand
submit_temp.loc[:, "Pack"] = submit_temp.pack
submit_temp.loc[:, "Size"] = submit_temp["size"]
submit_temp.loc[:, "Volume_Estimate"] = submit_temp.volume
submit_temp.loc[:, "Net_Revenue_Estimate"] = submit_temp.net_revenue
submit_temp.loc[:, "Optimal_Promotional_Discount"] = submit_temp.promotional_discount
submit_temp.loc[:, "Optimal_Other_Discounts"] = submit_temp.other_discounts
submit_temp.loc[:, "Optimal_Volume"] = submit_temp.volume_opt
submit_temp.loc[:, "Optimal_Net_Revenue"] = submit_temp.net_revenue_opt

submit_temp = submit_temp[cols_req]
submit_temp.to_csv("/home/akshay-development-server/promo-optimization_team-simpsons-paradox/data/team_simpsons_paradox_submission_11.csv", index=False)
