In [None]:
# !pip install azure-storage-blob
# !pip install python-dotenv

import os
from dotenv import load_dotenv
from setup_utils import fetch_data, load_data, create_time_index

CONNECTION_STRING = os.getenv("CONNECTION_STRING")

load_dotenv()
# fetch_data(CONNECTION_STRING)

(
    brand_mapping,
    macro_data,
    brand_constraint,
    pack_constraint,
    segment_constraint,
    sales_data,
    volume_variation_constraint,
) = load_data()

(
    macro_data,
    sales_data,
) = create_time_index([macro_data, sales_data])



In [None]:
# import tensorboard

In [None]:
# df = sales_data[sales_data.volume.notna()].join(macro_data, on="date", how="left")
# df_test = sales_data[sales_data.volume.isna()].join(macro_data, on="date", how="left")

df = sales_data.join(macro_data, on="date", how="left")
df = df[((df.gto>0) & (df.volume>0) & (df.promotional_discount<=0) & (df.other_discounts<=0)) | df.gto.isna() ].reset_index(drop=True)

In [None]:
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, r2_score
import xgboost as xgb
import numpy as np
import pandas as pd
# from scipy import stats


In [None]:
# tf.compat.v1.disable_eager_execution()


In [None]:
temp_data = sales_data[sales_data.gto.isna()].reset_index()
temp_data["month"] = temp_data.date.dt.month
temp_data["year"] = temp_data.date.dt.year
temp_data = temp_data.fillna(10000)
temp_data = temp_data.merge(brand_mapping)

In [None]:
master_mapping = temp_data[["sku", "pack", "brand", "segment"]].drop_duplicates().reset_index(drop=True)

In [None]:
constraints_dict = {
    "brand" : brand_constraint,
    "pack" : pack_constraint,
    "segment" : segment_constraint,
    "volume_variation" : volume_variation_constraint
}

In [None]:
tf.c

In [None]:
class Optimizer:

    def __init__(self, master_mapping, constraint_dict):
        tf.compat.v1.disable_eager_execution()
        tf.compat.v1.reset_default_graph()
        self.sess = tf.compat.v1.Session()

        n_time_period = 2
        n_sku = 151
        n_vehicle = 2

        x_shape = (n_time_period, n_sku, n_vehicle)
        self.x_initial = np.random.normal(loc=100, scale=10, size=x_shape)

        self.master_mapping = master_mapping
        self._encodings = self._create_encodings(master_mapping)

        self.slopes = tf.constant(np.random.rand(2,151,2), name='x', dtype=tf.float64)

        self.constraint_dict = constraint_dict
        self._brand_constraint_tensor = self._constraint_tensor_generate(self.constraint_dict["brand"], self._encodings, "brand")
        self._pack_constraint_tensor = self._constraint_tensor_generate(self.constraint_dict["pack"], self._encodings, "pack")
        self._segment_constraint_tensor = self._constraint_tensor_generate(self.constraint_dict["segment"], self._encodings, "segment")

        self.x = tf.Variable(np.random.normal(loc=100, scale=10, size=x_shape), name='x', dtype=tf.float64)

        self.x_brand = self._tensor_gather(self.x, self._encodings, "brand")
        self.x_pack = self._tensor_gather(self.x, self._encodings, "pack")
        self.x_segment = self._tensor_gather(self.x, self._encodings, "segment")

        self.brand_constraint_loss = tf.reduce_sum(tf.nn.relu(self._brand_constraint_tensor - self.x_brand))
        self.pack_constraint_loss = tf.reduce_sum(tf.nn.relu(self._pack_constraint_tensor - self.x_pack))
        self.segment_constraint_loss = tf.reduce_sum(tf.nn.relu(self._segment_constraint_tensor - self.x_segment))
        
        self.y_pred_opt = self.fake_model(self.x)
        self.y_pred_cons = self.fake_model(self.x_initial)

        self.roi = self._calculate_roi(self.y_pred_opt, self.y_pred_cons, self.x)

        self.loss = self.define_loss()

        self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1)
        self.train_op = self.optimizer.minimize(self.loss)

        self.sess.run(tf.compat.v1.global_variables_initializer())

    @tf.function
    def fake_model(self, X):
        return tf.reduce_sum(tf.multiply(self.slopes, tf.pow(tf.nn.relu(X), tf.constant(0.75, dtype=tf.float64))))

    # @tf.function
    # def fake_model_cons(self, X):
    #     return tf.reduce_sum(tf.multiply(self.slopes, X))

    def _create_encodings(self, master_map):

        def label_encoder(series):
            unique_values = series.sort_values().unique()
            unique_count =  series.nunique()

            return dict(zip(unique_values, range(len(unique_values))))
        
        def mapper(col_val, col_key="sku"):

            df = master_mapping[[col_key, col_val]].drop_duplicates()
            df.loc[:,col_val] = df[col_val].map(label_dict[col_val])
            df.loc[:,col_key] = df[col_key].map(label_dict[col_key])

            return df.set_index(col_key).to_dict()[col_val]

        label_dict = {col:label_encoder(master_map[col]) for col in master_map.columns}
        mapper_dict = {col:mapper(col) for col in master_map.columns if col!="sku"}

        return {"label_dict" : label_dict, "mapper_dict" : mapper_dict}

    @tf.function
    def _calculate_roi(self, y_opt, y_act, x):
        return -tf.divide(tf.reduce_sum(tf.subtract(self.y_pred_opt, self.y_pred_cons)), tf.reduce_sum(self.x))

    @tf.function
    def _tensor_gather(self, x, encoding, key):
        encoding = pd.Series(encoding["mapper_dict"][key]).sort_index().to_numpy()
        segment_ids = tf.constant(encoding, dtype=tf.int32)
        x_transpose = tf.transpose(x, perm=[1,0,2])
        x_gathered = tf.math.unsorted_segment_sum(x_transpose, segment_ids, num_segments=encoding.max()+1)
        x_gathered_transpose = tf.reduce_sum(tf.transpose(x_gathered, perm=[1,0,2]), axis=2)

        return x_gathered_transpose

    def _constraint_tensor_generate(self, constraint, encoding, key):

        encoding_length = max(encoding["label_dict"][key].values())+1
        constraint = constraint.copy(deep=True)
        constraint = constraint.replace(encoding["label_dict"][key]).sort_values(["month", key])
        constraint = constraint.groupby(["month", key]).max_discount.sum().sort_index().unstack(1)

        constraint = pd.DataFrame(columns=pd.Index(range(0,encoding_length), dtype='int64', name="brand"), index=pd.Index(range(6,8), dtype='int64')).fillna(constraint).fillna(0.0).to_numpy()

        return constraint
    
    @tf.function
    def define_loss(self):
        return -self.roi + tf.nn.relu(tf.reduce_sum(self.x) - self.x_initial.sum()) #+ self.brand_constraint_loss + self.pack_constraint_loss + self.segment_constraint_loss
    
        

    def train(self, epochs):
        for i in range(epochs):
            _, loss = self.sess.run([self.train_op, self.loss])
            # self.roi = self._calculate_roi(self.y_pred_opt, self.y_pred_cons, self.x)
            if i%100==0:
                print(loss)


In [None]:
opt = Optimizer(master_mapping, constraints_dict)

In [None]:
opt.sess.close()

In [None]:
opt.train(400)

In [None]:
opt.x_initial.sum()

In [None]:
opt.sess.run(opt.x)#.sum()

In [None]:
opt.sess.run(opt.roi)

In [None]:
@tf.function
def fake_model(X):
    return tf.multiply(tf.constant(np.random.normal(loc=2, scale=0.1, size=None), dtype=tf.float64), tf.reduce_sum(X, axis=2), name='volume_optimal')

@tf.function
def fake_model_cons(X):
    return tf.reduce_sum(X, axis=2)

In [None]:

sess = tf.compat.v1.Session()

sess.run(tf.compat.v1.global_variables_initializer())
print(sess.run(objective))

num_epochs = 100
for epoch in range(num_epochs):
    _, current_error, current_x = sess.run([train_op, objective, x])
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Error: {sess.run(objective):.4f}")

# Print the final results for 'm' and 'c'
# final_m, final_c = sess.run(x)
# print(f"Final 'm' value: {final_m:.4f}")
# print(f"Final 'c' value: {final_c:.4f}")

In [None]:
sess.run(objective)

In [None]:
# import tensorflow as tf

# tf.compat.v1.disable_eager_execution()

# # Example data
# data = tf.constant([2, 3, 1, 5, 2, 4], dtype=tf.float32)
# segment_ids = tf.constant([0, 1, 0, 1, 2, 2], dtype=tf.int32)  # Unsorted segment IDs
# sorted_segment_indices = tf.argsort(segment_ids)
# sorted_segment_ids = tf.gather(segment_ids, sorted_segment_indices)

# # Create unique segment IDs for sorting
# unique_segment_ids, _ = tf.unique(segment_ids)

# # Sort the data based on the sorted segment IDs
# sorted_data = tf.gather(data, tf.argsort(segment_ids))

# # Compute the segment sum using tf.segment_sum with sorted data and original segment IDs
# segment_sum_result = tf.math.segment_sum(sorted_data, sorted_segment_ids)

# # Create a TensorFlow session
with tf.compat.v1.Session() as sess:
    # Calculate the segment sum
    result = sess.run(qq)

    # # Print the result
    # print("Data:")
    # print(sess.run(data))
    # print("Segment IDs:")
    # print(sess.run(segment_ids))
    # print("Segment Sum Result:")
    print(result)


In [None]:
x

In [None]:

# df_og = df.copy(deep=True)

# # Identify and convert categorical columns to label encoding
# categorical_columns = df.select_dtypes(include=['object']).columns
# label_encoders = {}
# for col in categorical_columns:
#     le = LabelEncoder()
#     df[col] = le.fit_transform(df[col])
#     label_encoders[col] = le


# target_column = ["gto", "volume"]
# X = df[df.gto.notna()].drop(columns=[*target_column])
# y = df[df.gto.notna()][target_column]

# X_test = df[df.gto.isna()].drop(columns=[*target_column])





In [None]:
# import numpy as np

# # Determine the number of levels by finding the maximum index along each dimension
# num_levels = max(len(index) for index in series.index)

# # Initialize an empty multi-dimensional array with zeros
# shape = tuple(np.max(series.index, axis=0) + 1)  # Determine the shape based on the maximum indices
# np.array(series).reshape(shape=shape)


# def create_multi_dim_array(series):
#     "create a multi dimensional array from multdimensional index series according to number of levels in series"
    
#     num_levels = max(len(index) for index in series.index)

#     # Initialize an empty multi-dimensional array with zeros
#     shape = [series.index.get_level_values(i).nunique() for i in range(num_levels)]
    
#     return np.array(series).reshape(shape)

# create_multi_dim_array(series)

# import numpy as np
# import pandas as pd
# from itertools import product

# def create_multi_dim_array(series):
#     """Create a multi-dimensional array from a multi-dimensional index series
#     by first creating the Cartesian product of index levels and then reshaping."""
    
#     # Calculate the number of levels and the unique values in each level
#     num_levels = len(series.index.levels)
#     unique_values = [level.unique() for level in series.index.levels]
    
#     # Create the Cartesian product of index levels
#     cartesian_product = list(product(*unique_values))
    
#     # Create a new DataFrame with the Cartesian product as the index
#     merged_index = pd.MultiIndex.from_tuples(cartesian_product, names=series.index.names)
#     merged_series = pd.Series(0, index=merged_index, dtype=int)  # Initialize with zeros
    
#     # Merge the original series into the new DataFrame
#     merged_series.update(series)
    
#     # Reshape the merged series into a multi-dimensional NumPy array
#     shape = [len(level) for level in merged_series.index.levels]
#     multi_dim_array = np.array(merged_series).reshape(shape)
    
#     return multi_dim_array, unique_values

# create_multi_dim_array(series)[1]


# import numpy as np

# # Determine the number of levels by finding the maximum index along each dimension
# num_levels = max(len(index) for index in series.index)

# # Initialize an empty multi-dimensional array with zeros
# shape = [series.index.get_level_values(i).nunique() for i in range(num_levels)]
# multi_dim_array = np.zeros(shape, dtype=float)

# # # Fill the array using the index series
# # for index in series.index:
# #     multi_dim_array[index] = 1  # You can set any value you want here

# # print(multi_dim_array)
# # 