## **Libraries**

In [None]:
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

## **Data**

In [None]:
# Load raw data
df_purchase = pd.read_csv(os.path.join("..", "data", "purchase_data.csv"))

In [None]:
# Load pickled objects in order to segment the purchase data
kmeans_pca = pickle.load(open(os.path.join("utils", "kmeans_pca.pickle"), "rb"))
pca = pickle.load(open(os.path.join("utils", "pca.pickle"), "rb"))
scaler = pickle.load(open(os.path.join("utils", "scaler.pickle"), "rb"))

In [None]:
# Standardization
features = df_purchase[["Sex", "Marital status", "Age", "Education", "Income", "Occupation", "Settlement"]]
df_segm_std = scaler.transform(features)

In [None]:
# Apply PCA on the purchase data to obtain three principal components for each row in the table
df_segm_pca = pca.transform(df_segm_std)

In [None]:
# Segment the purchase data into the four segments using the principal component analysis
pca_segments = kmeans_pca.predict(df_segm_pca)

In [None]:
# Create a copy to keep original data
df_predictors = df_purchase.copy()

In [None]:
# Add segment labels
df_predictors["Segment"] = pca_segments

In [None]:
# Create segment dummies
segment_dummies = pd.get_dummies(pca_segments, prefix = "Segment", prefix_sep = "_")

In [None]:
# Concatenate the two data frames
df_predictors = pd.concat([df_predictors, segment_dummies], axis = 1)

In [None]:
# Create a copy to keep original data
df_pca = df_predictors

## **Purchase Probability**

### **Model**

In [None]:
# Set the dependent variable to predict the purchase probability
Y = df_pca["Incidence"]

In [None]:
# The dependent variable is based on the the mean across the five prices
X = pd.DataFrame()
X["Mean_Price"] = (df_pca["Price_1"] +
                   df_pca["Price_2"] +
                   df_pca["Price_3"] +
                   df_pca["Price_4"] +
                   df_pca["Price_5"]) / 5

In [None]:
# Fit a logistic regression to estimate the probability of purchase
model_purchase = LogisticRegression(solver = "sag")
model_purchase.fit(X, Y)
model_purchase.coef_

### **Elasticity**

In [None]:
# Show the price range for the five different brands
df_pca[["Price_1", "Price_2", "Price_3", "Price_4", "Price_5"]].describe().round(3)

In [None]:
# Introduce the price range for examination of purchase probability
price_range = np.arange(0.5, 3.5, 0.01)
price_range

In [None]:
# Create a new data frame based on the price range defined above
df_price = pd.DataFrame(price_range)
df_price.head()

In [None]:
# Predict and extract the purchase probability for each point in our price range defined in the second column
Y_pr = model_purchase.predict_proba(df_price)
purchase_probability = Y_pr[:][:, 1]

In [None]:
# Compute the price elasticity using model coefficients
price_elasticity = model_purchase.coef_[:, 0] * price_range * (1 - purchase_probability)

In [None]:
# Create a data frame that contains all the prices elasticities calculated
df_elasticities = pd.DataFrame(price_range)
df_elasticities.head()

In [None]:
# Add the price elasticities calculated before
df_elasticities = df_elasticities.rename(columns = {0: "Price_Point"})
df_elasticities["Price_Elasticity"] = price_elasticity
df_elasticities.head()

In [None]:
# Display the price elasticity of purchase probability of the average customer
plt.figure(figsize = (9, 6))
plt.plot(price_range, price_elasticity, color = "grey")
plt.xlabel("Price", fontsize = 12)
plt.ylabel("Elasticity", fontsize = 12)

### **By Segments**

In [None]:
# Segment 0 - Standard
df_segm_0 = df_pca[df_pca["Segment"] == 0]
Y = df_segm_0["Incidence"]

X = pd.DataFrame()
X["Mean_Price"] = (df_segm_0["Price_1"] + 
                   df_segm_0["Price_2"] + 
                   df_segm_0["Price_3"] + 
                   df_segm_0["Price_4"] + 
                   df_segm_0["Price_5"]) / 5

model_segm_0 = LogisticRegression(solver = "sag")
model_segm_0.fit(X, Y)

Y_segm_0 = model_segm_0.predict_proba(df_price)
pp_segm_0 = Y_segm_0[:][:, 1]                                           # Purchase Probability (PP)
pe_segm_0 = model_segm_0.coef_[:, 0] * price_range * (1 - pp_segm_0)    # Price Elasticity (PE)

df_elasticities["PE_Segment_0"] = pe_segm_0

In [None]:
# Segment 1 - Career-Focused
df_segm_1 = df_pca[df_pca["Segment"] == 1]
Y = df_segm_1["Incidence"]

X = pd.DataFrame()
X["Mean_Price"] = (df_segm_1["Price_1"] + 
                   df_segm_1["Price_2"] + 
                   df_segm_1["Price_3"] + 
                   df_segm_1["Price_4"] + 
                   df_segm_1["Price_5"]) / 5

model_segm_1 = LogisticRegression(solver = "sag")
model_segm_1.fit(X, Y)

Y_segm_1 = model_segm_1.predict_proba(df_price)
pp_segm_1 = Y_segm_1[:][:, 1]                                           # Purchase Probability (PP)
pe_segm_1 = model_segm_1.coef_[:, 0] * price_range * (1 - pp_segm_1)    # Price Elasticity (PE)

df_elasticities["PE_Segment_1"] = pe_segm_1

In [None]:
# Segment 2 - Fewer-Opportunities
df_segm_2 = df_pca[df_pca["Segment"] == 2]
Y = df_segm_2["Incidence"]

X = pd.DataFrame()
X["Mean_Price"] = (df_segm_2["Price_1"] + 
                   df_segm_2["Price_2"] + 
                   df_segm_2["Price_3"] + 
                   df_segm_2["Price_4"] + 
                   df_segm_2["Price_5"]) / 5

model_segm_2 = LogisticRegression(solver = "sag")
model_segm_2.fit(X, Y)

Y_segm_2 = model_segm_2.predict_proba(df_price)
pp_segm_2 = Y_segm_2[:][:, 1]                                           # Purchase Probability (PP)
pe_segm_2 = model_segm_2.coef_[:, 0] * price_range * (1 - pp_segm_2)    # Price Elasticity (PE)

df_elasticities["PE_Segment_2"] = pe_segm_2

In [None]:
# Segment 3 - Well-Off
df_segm_3 = df_pca[df_pca["Segment"] == 3]
Y = df_segm_3["Incidence"]

X = pd.DataFrame()
X["Mean_Price"] = (df_segm_3["Price_1"] + 
                   df_segm_3["Price_2"] + 
                   df_segm_3["Price_3"] + 
                   df_segm_3["Price_4"] + 
                   df_segm_3["Price_5"]) / 5

model_segm_3 = LogisticRegression(solver = "sag")
model_segm_3.fit(X, Y)

Y_segm_3 = model_segm_3.predict_proba(df_price)
pp_segm_3 = Y_segm_3[:][:, 1]                                           # Purchase Probability (PP)
pe_segm_3 = model_segm_3.coef_[:, 0] * price_range * (1 - pp_segm_3)    # Price Elasticity (PE)

df_elasticities["PE_Segment_3"] = pe_segm_3

### **Results**

In [None]:
# Display all elasticities of purchase probability on the same plot
plt.figure(figsize = (9, 6))
plt.plot(price_range, price_elasticity, color = "grey")
plt.plot(price_range, pe_segm_0, color = "b")
plt.plot(price_range, pe_segm_1, color = "green")
plt.plot(price_range, pe_segm_2, color = "r")
plt.plot(price_range, pe_segm_3, color = "orange")
plt.xlabel("Price", fontsize = 12)
plt.ylabel("Elasticity", fontsize = 12)
plt.legend(["Average", "Segment 0", "Segment 1", "Segment 2", "Segment 3"],
           loc = "upper left",
           fontsize = 12,
           bbox_to_anchor = (1, 1))

## **Promotion**

### **Data**

In [None]:
# Set the dependent variable to predict the purchase probability
Y = df_pca["Incidence"]

In [None]:
# The dependent variable is based on the the mean across the five prices
X = pd.DataFrame()
X["Mean_Price"] = (df_pca["Price_1"] +
                   df_pca["Price_2"] +
                   df_pca["Price_3"] +
                   df_pca["Price_4"] +
                   df_pca["Price_5"]) / 5

In [None]:
# Include and calculate the average promotion rate across the five brands
X["Mean_Promotion"] = (df_pca["Promotion_1"] +
                       df_pca["Promotion_2"] +
                       df_pca["Promotion_3"] +
                       df_pca["Promotion_4"] +
                       df_pca["Promotion_5"] ) / 5
X.head()

### **Model**

In [None]:
# Estimate the relationship between promotion and purchase probability
model_promotion = LogisticRegression(solver = "sag")
model_promotion.fit(X, Y)
model_promotion.coef_

### **Elasticity**

In [None]:
# Create a new data frame and include the price range as the price feature
df_promotion = pd.DataFrame(price_range)
df_promotion = df_promotion.rename(columns = {0: "Price_Range"})
df_promotion.head()

In [None]:
# Calculate price elasticities of purchase probability when there is a promotion at each price points
df_promotion["Promotion"] = 1
Y_promotion = model_promotion.predict_proba(df_promotion)
promotion = Y_promotion[:, 1]
pe_promo = (model_promotion.coef_[:, 0] * price_range) * (1 - promotion)

In [None]:
# Update master data to include elasticities of purchase probability with promotion feature
df_elasticities["PE_Promotion"] = pe_promo
df_elasticities.head()

### **Non-Promoted Price**

In [None]:
# Create a new data frame and include the price range as the price feature
df_no_promo = pd.DataFrame(price_range)
df_no_promo = df_no_promo.rename(columns = {0: "Price_Range"})

In [None]:
# Examine the price elasticity of purchase probability when there is no promotion
df_no_promo["Promotion"] = 0
Y_no_promo = model_promotion.predict_proba(df_no_promo)
no_promotion = Y_no_promo[: , 1]
pe_no_promo = model_promotion.coef_[:, 0] * price_range * (1- no_promotion)

In [None]:
# Update master data frame to include purchase probability elasticities without promotion
df_elasticities["PE_No_Promotion"] = pe_no_promo
df_elasticities.head()

In [None]:
# Plot purchase elasticities with and without promotion side by side for comparison 
plt.figure(figsize = (9, 6))
plt.plot(price_range, pe_no_promo)
plt.plot(price_range, pe_promo)
plt.xlabel("Price", fontsize = 12)
plt.ylabel("Elasticity", fontsize = 12)

## **Brand Choice**

### **Data**

In [None]:
# Filter data to include only purchase occasion when a purchase has occured
brand_choice = df_pca[df_pca["Incidence"] == 1]
pd.options.display.max_rows = 100
brand_choice

In [None]:
# Set the dependent variable to predict the brand choice
Y = brand_choice["Brand"]

In [None]:
# Create the features to predict the dependent variable
features = ["Price_1", "Price_2", "Price_3", "Price_4", "Price_5"]
X = brand_choice[features]

### **Model**

In [None]:
# Estimate the relationship between prices and brand choice
model_brand = LogisticRegression(solver = "sag", multi_class = "multinomial")
model_brand.fit(X, Y)

In [None]:
# Create a new data frame that contains the coefficients
bc_coefficient = pd.DataFrame(model_brand.coef_)
round(bc_coefficient, 3)

In [None]:
# Transpose the data frame to keep with the conventional representation of results
bc_coefficient = pd.DataFrame(np.transpose(model_brand.coef_))

In [None]:
# Add labels for the columns that represent the coefficients of the brands
coefficients = ["Brand_1", "Brand_2", "Brand_3", "Brand_4", "Brand_5"]
bc_coefficient.columns = [coefficients]

In [None]:
# Add labels for the index that represent the coefficients of the prices
prices = ["Price_1", "Price_2", "Price_3", "Price_4", "Price_5"]
bc_coefficient.index = [prices]

In [None]:
# Round and display the coefficients
bc_coefficient = bc_coefficient.round(3)
bc_coefficient

### **Elasticity**

In [None]:
# Create a new data frame with price columns used to predict the brand choice probabilities
df_brand_5 = pd.DataFrame(index = np.arange(price_range.size))
df_brand_5["Price_1"] = brand_choice["Price_1"].mean()
df_brand_5["Price_2"] = brand_choice["Price_2"].mean()
df_brand_5["Price_3"] = brand_choice["Price_3"].mean()
df_brand_5["Price_4"] = brand_choice["Price_4"].mean()
df_brand_5["Price_5"] = price_range
df_brand_5

In [None]:
# Predict brand choice probability for the fifth brand
brand_5 = model_brand.predict_proba(df_brand_5)
pr_brand_5 = brand_5[: ][:, 4]

In [None]:
# The beta coefficient required is that of the fifth brand at its own price
beta5 = bc_coefficient.iloc[4, 4]
beta5

In [None]:
# Calculate price elasticities for brand choice without promotion
pe_br_5 = beta5 * price_range * (1 - pr_brand_5)

In [None]:
# Add the price elasticities to our master data frame
df_elasticities["Brand_5"] = pe_br_5
df_elasticities.head()

### **Cross-Price Elasticity**

In [None]:
# Examine the effect of the changes in price of a competitor brand
df_cross = pd.DataFrame(index = np.arange(price_range.size))
df_cross["Price_1"] = brand_choice["Price_1"].mean()
df_cross["Price_2"] = brand_choice["Price_2"].mean()
df_cross["Price_3"] = brand_choice["Price_3"].mean()
df_cross["Price_4"] = price_range
df_cross["Price_5"] = brand_choice["Price_5"].mean()

df_cross.head()

In [None]:
# Predict brand choice probability for the competitor brand
predict_cross = model_brand.predict_proba(df_cross)
predict_cross

In [None]:
# Select the brand choice probability for the competitor brand
pr_brand_4 = predict_cross[:][:, 3]

In [None]:
# Calculate the cross-price elasticity using the brand choice probability for the competitor brand
cross_elasticity = -beta5 * price_range * pr_brand_4

In [None]:
# Update price elasticities data frame to include the cross-price elasticities
df_elasticities["Cross-Price_Elasticity"] = cross_elasticity
df_elasticities.head()

In [None]:
# Examine the cross-price elasticity of brand choice
plt.figure(figsize = (9, 6))
plt.plot(price_range, cross_elasticity, color = "grey")
plt.xlabel("Price Brand 4", fontsize = 12)
plt.ylabel("Elasticity", fontsize = 12)

## **Purchase Quantity**

### **Data**

In [None]:
# Filter the data to contain only visits where the client has purchased at least one product
df_quantity = df_pca[df_pca["Incidence"] == 1]
df_quantity.head()

In [None]:
# Create brand dummies for each of the five brands
df_quantity = pd.get_dummies(df_quantity, columns = ["Brand"], prefix = "Brand", prefix_sep = "_")
df_quantity.describe()

In [None]:
# Create a variable that indicates the price of the product purchased
df_quantity["Price_Incidence"] = (df_quantity["Brand_1"] * df_quantity["Price_1"] +
                                  df_quantity["Brand_2"] * df_quantity["Price_2"] +
                                  df_quantity["Brand_3"] * df_quantity["Price_3"] +
                                  df_quantity["Brand_4"] * df_quantity["Price_4"] +
                                  df_quantity["Brand_5"] * df_quantity["Price_5"])

In [None]:
# Create a variable that indicates whether there was a promotion
df_quantity["Promotion_Incidence"] = (df_quantity["Brand_1"] * df_quantity["Promotion_1"] +
                                      df_quantity["Brand_2"] * df_quantity["Promotion_2"] +
                                      df_quantity["Brand_3"] * df_quantity["Promotion_3"] +
                                      df_quantity["Brand_4"] * df_quantity["Promotion_4"] +
                                      df_quantity["Brand_5"] * df_quantity["Promotion_5"])

### **Model**

In [None]:
# Adds the variables to the features used to predict the dependent variable
X = df_quantity[["Price_Incidence", "Promotion_Incidence"]]
pd.options.display.max_rows = 100
X

In [None]:
# Set the dependent variable to predict the quantity purchased
Y = df_quantity["Quantity"]
Y.head()

In [None]:
# Fit the linear regression model
model_quantity = LinearRegression()
model_quantity.fit(X, Y)
model_quantity.coef_

### **Elasticity**

In [None]:
# Create a new data frame to examine the price elasticity of demand with active promotional activities
df_elasticity = pd.DataFrame(index = np.arange(price_range.size))
df_elasticity["Price_Incidence"] = price_range
df_elasticity["Promotion_Incidence"] = 1

In [None]:
# Compute the coefficient for the price
beta_quantity = model_quantity.coef_[0]
round(beta_quantity, 3)

In [None]:
# Predict the quantity of products purchased with promotion
predict_quantity = model_quantity.predict(df_elasticity)

In [None]:
# Calculate the price elasticity of demand with promotion
pe_promo = beta_quantity * price_range / predict_quantity

In [None]:
# Update master data to include elasticities of demand with promotion feature
df_elasticities["PE_Quantity_Promotion"] = pe_promo
df_elasticities.head()

### **Non-Promoted Price**

In [None]:
# Overwrite the promotion incidence variable in order to use the same data
df_elasticity["Promotion_Incidence"] = 0

In [None]:
# Predict the quantity of products purchased without promotion
predict_quantity = model_quantity.predict(df_elasticity)

In [None]:
# Calculate the new price elasticities
pe_no_promo = beta_quantity * price_range / predict_quantity

In [None]:
# Update master data to include elasticities of demand without promotion feature
df_elasticities["PE_Quantity_No_Promotion"] = pe_no_promo
df_elasticities.head()

In [None]:
# Plot the elasticities with and without promotion side by side
plt.figure(figsize = (9, 6))
plt.plot(price_range, pe_promo, color = "orange")
plt.plot(price_range, pe_no_promo)
plt.xlabel("Price", fontsize = 12)
plt.ylabel("Elasticity", fontsize = 12)