In [1]:
import Utils
from DataLoader import DataLoader
from FeatureCreator import FeatureCreator
from Model import Model
from CouponCreator import CouponCreator

In [2]:
config = Utils.read_json('../config.json')


# DATA LOADING
dataloader = DataLoader(config)
dataset = dataloader.get_dataset()


# FEATURE CREATION
feature_creator = FeatureCreator(dataset, config)
model_data = feature_creator.get_model_data()


# MODELING - Train-Test-Split
model = Model(model_data)
X_train, y_train, X_test, y_test = model.train_test_split(config)


# FITTING THE MODEL
model.fit(X_train, y_train)
y_hat = model.predict(X_train)
log_loss_score = model.log_loss_score(y_train, y_hat)
print(f'log loss scores on the train data: \t{log_loss_score}')


# CREATING THE FINAL OUTPUT
coupon_creator = CouponCreator(model, config)
optimal_coupons = coupon_creator.get_top_coupons()


# EVALUATING THE RESULTS AGAINST RANDOM AND NO COUPONS
evaluation = Evaluation(model, config, optimal_coupons)
evaluation.evaluate()

optimal_coupons

Read dataset.parquet.gzip from cache...
Successfully read dataset.parquet.gzip into memory.
Read model_data.parquet.gzip from cache...
Successfully read model_data.parquet.gzip into memory.


  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):


log loss scores on the train data: 	0.07593747852013212


Unnamed: 0,shopper,week,coupon,product,discount
2000171,0,90,0,171,0.15
2000157,0,90,1,157,0.15
2000067,0,90,2,67,0.15
2000076,0,90,3,76,0.15
2000116,0,90,4,116,0.15
...,...,...,...,...,...
2499996,1999,90,0,246,0.20
2499874,1999,90,1,124,0.20
2499781,1999,90,2,31,0.15
2499791,1999,90,3,41,0.15


In [3]:
import numpy as np

class Evaluation:
    
    def __init__(self, model, config, optimal_coupons):
        self.model = model
        self.config = config
        self.optimal_coupons = optimal_coupons
        self.random_coupons = None
        self.zero_coupons = None

        
    def evaluate(self):
        zero_coupons = self.get_zero_coupons()
        random_coupons = self.get_random_coupons()
        optimal_coupons = self.optimal_coupons

        zero_revenue = self.get_expected_total_revenue(zero_coupons)
        random_revenue = self.get_expected_total_revenue(random_coupons)
        optimal_revenue = self.get_expected_total_revenue(optimal_coupons)

        print(f"expected total revenue for zero coupons:\t{zero_revenue}")
        print(f"expected total revenue for random coupons:\t{random_revenue}")
        print(f"expected total revenue for optimal coupons:\t{optimal_revenue}")
    

    def get_random_coupons(self):
        random_coupons = self.optimal_coupons[['shopper', 'week', 'coupon']].copy()
        shoppers = list(range(self.config['model']['n_shoppers'])) 
        products = list(range(self.config['model']['n_products']))
        discounts = self.config['model']['discounts']
        n_coupons = self.config['model']['n_coupons']

        # for each shopper: random choice of products and discounts
        for shopper in shoppers:
            random_products = np.random.choice(products, n_coupons, replace=False)
            random_discounts = np.random.choice(discounts, n_coupons)
            random_coupons.loc[random_coupons['shopper'] == shopper, 'product'] = random_products
            random_coupons.loc[random_coupons['shopper'] == shopper, 'discount'] = random_discounts
        return random_coupons

    
    def get_zero_coupons(self):
        zero_coupons = self.optimal_coupons[['shopper', 'week', 'coupon']].copy()
        shoppers = list(range(self.config['model']['n_shoppers'])) 
        products = list(range(self.config['model']['n_products']))
        n_coupons = self.config['model']['n_coupons']
        
        # create the same output structure using random products and zero discount
        for shopper in shoppers:
            random_products = np.random.choice(products, 5, replace=False)
            zero_discount = 0
            zero_coupons.loc[zero_coupons['shopper'] == shopper, 'product'] = random_products
            zero_coupons.loc[zero_coupons['shopper'] == shopper, 'discount'] = zero_discount
        return zero_coupons
    
    
    def get_expected_total_revenue(self, coupons):
        revenue = self.model.data[['week', 'shopper', 'product', 'price']].copy()
        revenue = revenue.loc[revenue['week'] == self.config['model']['test_week']]
        revenue['discount'] = 0
        revenue.loc[coupons.index, 'discount'] = coupons['discount']

        X_test = self.model.X_test.copy()
        X_test['discount'] = 0
        X_test.loc[coupons.index, 'discount'] = coupons['discount']

        revenue['probabilities'] = self.model.predict(X_test)
        revenue['expected_revenue'] = revenue["probabilities"] * revenue["price"] * (1 - revenue["discount"])
        total_revenue = revenue['expected_revenue'].sum()
        return total_revenue

In [4]:
# EVALUATING THE RESULTS AGAINST RANDOM AND NO COUPONS
evaluation = Evaluation(model, config, optimal_coupons)
evaluation.evaluate()

[evaluation] expected total revenue - zero coupons:	8928397.306369238
[evaluation] expected total revenue - random coupons:	9702851.896227017
[evaluation] expected total revenue - optimal coupons:	10010292.044085754
