# purchase-analytics-predictive-analysis

## Libraries

In [104]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression, LinearRegression

## Data Preparation

In [2]:
#load data
df = pd.read_csv('purchase data.csv')

# import scaler
scale = joblib.load('scaler')

#import PCA
pca = joblib.load('pca')

# import cluster model
model= joblib.load('model_pca')

#stadarization
features = df[['Sex', 'Marital status', 'Age', 'Education', 'Income', 'Occupation', 'Settlement size']]
df_scale = scale.transform(features)

#apply pca
df_pca = pca.transform(df_scale)

#apply the model predict

y_pred = model.predict(df_pca)


#create copy of df

df_predictor = df.copy()
df_predictor['segment'] = y_pred
segment_dummies = pd.get_dummies(y_pred, prefix = 'Segment', prefix_sep = '_')
df_predictor = pd.concat([df_predictor, segment_dummies], axis =1)

data = df_predictor.copy()

In [3]:
data.head()

Unnamed: 0,ID,Day,Incidence,Brand,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,...,Age,Education,Income,Occupation,Settlement size,segment,Segment_0,Segment_1,Segment_2,Segment_3
0,200000001,1,0,0,0,0,0,1.59,1.87,2.01,...,47,1,110866,1,0,2,0,0,1,0
1,200000001,11,0,0,0,0,0,1.51,1.89,1.99,...,47,1,110866,1,0,2,0,0,1,0
2,200000001,12,0,0,0,0,0,1.51,1.89,1.99,...,47,1,110866,1,0,2,0,0,1,0
3,200000001,16,0,0,0,0,0,1.52,1.89,1.98,...,47,1,110866,1,0,2,0,0,1,0
4,200000001,18,0,0,0,0,0,1.52,1.89,1.99,...,47,1,110866,1,0,2,0,0,1,0


### Purchase Probability Model

In [4]:
y = data['Incidence']

In [5]:
y.value_counts()

0    44055
1    14638
Name: Incidence, dtype: int64

In [6]:
X = pd.DataFrame()
X['price_mean'] = (data['Price_1'] + data['Price_2'] +
data['Price_3'] + data['Price_4'] + data['Price_5'] )/5
X.head()

Unnamed: 0,price_mean
0,2.044
1,2.028
2,2.028
3,2.028
4,2.03


#### Create a Classifier model

In [7]:
from sklearn.linear_model import LogisticRegression

In [8]:
lr = LogisticRegression()
lr.fit(X,y)

LogisticRegression()

In [9]:
lr.coef_

array([[-2.3480548]])

## Price Elasticity of Purchase Probability

In [10]:
data.columns

Index(['ID', 'Day', 'Incidence', 'Brand', 'Quantity', 'Last_Inc_Brand',
       'Last_Inc_Quantity', 'Price_1', 'Price_2', 'Price_3', 'Price_4',
       'Price_5', 'Promotion_1', 'Promotion_2', 'Promotion_3', 'Promotion_4',
       'Promotion_5', 'Sex', 'Marital status', 'Age', 'Education', 'Income',
       'Occupation', 'Settlement size', 'segment', 'Segment_0', 'Segment_1',
       'Segment_2', 'Segment_3'],
      dtype='object')

In [11]:
# Aquí vemos los precios de las cinco marcas diferentes, lo cual es un factor importante para determinar la probabilidad de compra.
# Informa el rango de precios, para el cual exploraremos la probabilidad de compra.

data[['Price_1', 'Price_2', 'Price_3', 'Price_4',
       'Price_5']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Price_1,58693.0,1.392074,0.091139,1.1,1.34,1.39,1.47,1.59
Price_2,58693.0,1.780999,0.170868,1.26,1.58,1.88,1.89,1.9
Price_3,58693.0,2.006789,0.046867,1.87,1.97,2.01,2.06,2.14
Price_4,58693.0,2.159945,0.089825,1.76,2.12,2.17,2.24,2.26
Price_5,58693.0,2.654798,0.098272,2.11,2.63,2.67,2.7,2.8


In [12]:
# Introducimos el rango de precios para el cual examinaremos la probabilidad de compra.
# Elegimos un rango de precios entre 0,5 y 3,49, lo que amplía un poco el rango de precios real observado,
# que es de 1.1 a 2.8

price_range = np.arange(0.5, 3.49, 0.01)

In [13]:
print('cantidad de datos:', len(price_range))
print(price_range)

cantidad de datos: 299
[0.5  0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6  0.61 0.62 0.63
 0.64 0.65 0.66 0.67 0.68 0.69 0.7  0.71 0.72 0.73 0.74 0.75 0.76 0.77
 0.78 0.79 0.8  0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.9  0.91
 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.   1.01 1.02 1.03 1.04 1.05
 1.06 1.07 1.08 1.09 1.1  1.11 1.12 1.13 1.14 1.15 1.16 1.17 1.18 1.19
 1.2  1.21 1.22 1.23 1.24 1.25 1.26 1.27 1.28 1.29 1.3  1.31 1.32 1.33
 1.34 1.35 1.36 1.37 1.38 1.39 1.4  1.41 1.42 1.43 1.44 1.45 1.46 1.47
 1.48 1.49 1.5  1.51 1.52 1.53 1.54 1.55 1.56 1.57 1.58 1.59 1.6  1.61
 1.62 1.63 1.64 1.65 1.66 1.67 1.68 1.69 1.7  1.71 1.72 1.73 1.74 1.75
 1.76 1.77 1.78 1.79 1.8  1.81 1.82 1.83 1.84 1.85 1.86 1.87 1.88 1.89
 1.9  1.91 1.92 1.93 1.94 1.95 1.96 1.97 1.98 1.99 2.   2.01 2.02 2.03
 2.04 2.05 2.06 2.07 2.08 2.09 2.1  2.11 2.12 2.13 2.14 2.15 2.16 2.17
 2.18 2.19 2.2  2.21 2.22 2.23 2.24 2.25 2.26 2.27 2.28 2.29 2.3  2.31
 2.32 2.33 2.34 2.35 2.36 2.37 2.38 2.39 2.4  2.41 2.4

In [14]:
# convert to df
df_price_range = pd.DataFrame(price_range)

In [15]:
 #Predecimos la probabilidad de compra para nuestro rango de precios recién definido.
# El resultado es una matriz de 2x300. La primera columna muestra la probabilidad de no comprar el producto,
# el segundo es la probabilidad de compra para cada punto de precio en nuestro rango de precios.

y_pred = lr.predict_proba(df_price_range)

In [16]:
purchase_proba = y_pred[:][:, 1]
pe = lr.coef_[:, 0] * price_range * (1-purchase_proba)
pe

array([-0.09639583, -0.10046378, -0.10465871, -0.10898378, -0.11344218,
       -0.11803717, -0.12277206, -0.1276502 , -0.13267501, -0.13784994,
       -0.14317849, -0.14866423, -0.15431075, -0.1601217 , -0.16610077,
       -0.1722517 , -0.17857828, -0.18508431, -0.19177366, -0.19865024,
       -0.20571797, -0.21298082, -0.22044281, -0.22810796, -0.23598033,
       -0.24406402, -0.25236314, -0.26088181, -0.26962419, -0.27859446,
       -0.28779678, -0.29723535, -0.30691437, -0.31683803, -0.32701053,
       -0.33743607, -0.34811884, -0.359063  , -0.37027271, -0.38175212,
       -0.39350534, -0.40553645, -0.41784951, -0.43044852, -0.44333747,
       -0.45652028, -0.4700008 , -0.48378287, -0.49787023, -0.51226657,
       -0.52697549, -0.54200053, -0.55734513, -0.57301265, -0.58900636,
       -0.60532941, -0.62198488, -0.63897569, -0.65630468, -0.67397456,
       -0.6919879 , -0.71034714, -0.7290546 , -0.74811244, -0.76752265,
       -0.78728711, -0.80740751, -0.82788538, -0.84872209, -0.86

In [17]:
# Create price elasticities master data frame.
# It will contain all the elasticities we calculate during the purchase analytics part of the course

df_price_elasticity = pd.DataFrame(price_range)
df_price_elasticity.columns = ['Price_Point']
df_price_elasticity['means'] = pe
df_price_elasticity.head()

Unnamed: 0,Price_Point,means
0,0.5,-0.096396
1,0.51,-0.100464
2,0.52,-0.104659
3,0.53,-0.108984
4,0.54,-0.113442


#### Viz

In [18]:
import cufflinks as cf
from IPython.display import display,HTML

In [19]:
cf.getThemes()

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']

In [20]:
cf.set_config_file(sharing='public',theme='ggplot',offline=True) # write cf.getThemes() to find all themes available

In [21]:
df_price_elasticity['means'].iplot(kind = 'line', xTitle = 'Prices', yTitle = 'Elasticyti', 
                          title = 'Price Elasticity of Purchase Probability')

## Purchase Probability by Segments
### Segment 1 - Career-Focused

In [22]:
segment1 = data[data['segment']==1]
segment1.head()

Unnamed: 0,ID,Day,Incidence,Brand,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,...,Age,Education,Income,Occupation,Settlement size,segment,Segment_0,Segment_1,Segment_2,Segment_3
370,200000005,7,0,0,0,0,0,1.59,1.88,2.01,...,49,1,121466,1,2,1,0,1,0,0
371,200000005,13,0,0,0,0,0,1.51,1.89,1.99,...,49,1,121466,1,2,1,0,1,0,0
372,200000005,20,0,0,0,0,0,1.52,1.89,1.99,...,49,1,121466,1,2,1,0,1,0,0
373,200000005,32,0,0,0,0,0,1.5,1.9,1.99,...,49,1,121466,1,2,1,0,1,0,0
374,200000005,39,0,0,0,0,0,1.5,1.9,1.99,...,49,1,121466,1,2,1,0,1,0,0


In [23]:
mean_price = (segment1['Price_1'] + segment1['Price_2'] + 
                   
                   segment1["Price_3"] + segment1['Price_4'] + segment1['Price_5']
                   ) /5


X = pd.DataFrame()
X['mean_price'] = mean_price 
y = segment1['Incidence']

In [24]:
model_incidence_seg1 = LogisticRegression()
model_incidence_seg1.fit(X,y)

LogisticRegression()

In [25]:
y_seg1 = model_incidence_seg1.predict_proba(df_price_range)
purchase_seg1 = y_seg1[:][:, 1]
pe = model_incidence_seg1.coef_[:, 0] * price_range * (1-purchase_seg1)

#### Results 

In [26]:
df_price_elasticity['pe_seg1'] = pe

In [27]:
df_price_elasticity[['means','pe_seg1']].iplot(kind = 'line', xTitle = 'Prices', yTitle = 'Elasticyti', 
                          title = 'Price Elasticity of Purchase Probability')

### Segment 2 - Fewer-Opportunities

In [28]:
segment2 = data[data['segment']==2]

mean_price = (segment2['Price_1'] + segment2['Price_2'] + 
                   
                   segment2["Price_3"] + segment2['Price_4'] + segment2['Price_5']
                   ) /5


X = pd.DataFrame()
X['mean_price'] = mean_price 
y = segment2['Incidence']

In [29]:
model_incidence_seg2 = LogisticRegression()
model_incidence_seg2.fit(X,y)

LogisticRegression()

In [30]:
y_seg2 = model_incidence_seg2.predict_proba(df_price_range)
purchase_seg2 = y_seg2[:][:, 1]
pe_seg2 = model_incidence_seg2.coef_[:, 0] * price_range * (1-purchase_seg2)

#### Results seg2

In [31]:
df_price_elasticity['pe_seg2'] = pe_seg2

In [32]:
df_price_elasticity[['means','pe_seg1', 'pe_seg2']].iplot(kind = 'line', xTitle = 'Prices', yTitle = 'Elasticyti', 
                          title = 'Price Elasticity of Purchase Probability')

#### Segment 0 - Standard

In [33]:
segment0 = data[data['segment']==0]

mean_price = (segment0['Price_1'] + segment0['Price_2'] + 
                   
                   segment0["Price_3"] + segment0['Price_4'] + segment0['Price_5']
                   ) /5


X = pd.DataFrame()
X['mean_price'] = mean_price 
y = segment0['Incidence']

In [34]:
# apply model 
model_incidence_seg0 = LogisticRegression()
model_incidence_seg0.fit(X,y)

#predict_proba
y_seg0 = model_incidence_seg0.predict_proba(df_price_range)
purchase_seg0 = y_seg0[:][:, 1]
pe_seg0 = model_incidence_seg0.coef_[:, 0] * price_range * (1-purchase_seg0)

#results
df_price_elasticity['pe_seg0'] = pe_seg0


#plot
df_price_elasticity[['means','pe_seg1', 'pe_seg2', 'pe_seg0']].iplot(kind = 'line', xTitle = 'Prices', yTitle = 'Elasticyti', 
                          title = 'Price Elasticity of Purchase Probability')

#### Segment 3 - Well-Off

In [35]:
segment3 = data[data['segment']==3]

mean_price = (segment3['Price_1'] + segment3['Price_2'] + 
                   
                   segment3["Price_3"] + segment3['Price_4'] + segment3['Price_5']
                   ) /5


X = pd.DataFrame()
X['mean_price'] = mean_price 
y = segment3['Incidence']

In [36]:
# apply model 
model_incidence_seg3 = LogisticRegression()
model_incidence_seg3.fit(X,y)

#predict_proba
y_seg3 = model_incidence_seg3.predict_proba(df_price_range)
purchase_seg3 = y_seg3[:][:, 1]
pe_seg3 = model_incidence_seg3.coef_[:, 0] * price_range * (1-purchase_seg3)

#results
df_price_elasticity['pe_seg3'] = pe_seg3


#plot
df_price_elasticity[['means','pe_seg1', 'pe_seg2', 'pe_seg0', 'pe_seg3']].iplot(kind = 'line', xTitle = 'Prices', yTitle = 'Elasticyti', 
                          title = 'Price Elasticity of Purchase Probability')

### Purchase Probability with Promotion Feature

#### Data Preparation

In [37]:
y = data['Incidence']

In [38]:
X = pd.DataFrame()

#calculate mean price
X['mean_price'] = (data['Price_1'] + data['Price_2'] + data['Price_3'] +
                  data['Price_4'] + data['Price_5'] )/5

#calculate mean promotion
X['mean_promo'] = (data['Promotion_1'] + data['Promotion_2'] + data['Promotion_3'] +
                  data['Promotion_4'] + data['Promotion_5'] )/5
X.head()


Unnamed: 0,mean_price,mean_promo
0,2.044,0.2
1,2.028,0.0
2,2.028,0.0
3,2.028,0.0
4,2.03,0.0


### Model estimation

In [39]:
model = LogisticRegression()
model.fit(X, y)
model.coef_

array([[-1.49402641,  0.56152541]])

#### Price Elasticity with Promotion

In [40]:
df_price_elasticity_promotion = pd.DataFrame(price_range, columns=['Price_Range'])
df_price_elasticity_promotion.head()

Unnamed: 0,Price_Range
0,0.5
1,0.51
2,0.52
3,0.53
4,0.54


In [41]:
# Calcularemos las elasticidades del precio de la probabilidad de compra cuando supongamos que hay una promoción en cada punto de precio.
df_price_elasticity_promotion['promotion'] =1

In [42]:
y_promotion = model.predict_proba(df_price_elasticity_promotion)

In [43]:
#select the probailiti colum 1
promo = y_promotion[:,1]

price_elasticyti_promo = (model.coef_[:, 0] * price_range) * (1-promo)

In [44]:
# Update master data to include elasticities of purchase probability with promotion feature
df_price_elasticity['elasticity_promotion_1'] = price_elasticyti_promo
df_price_elasticity.head()

Unnamed: 0,Price_Point,means,pe_seg1,pe_seg2,pe_seg0,pe_seg3,elasticity_promotion_1
0,0.5,-0.096396,-0.119088,-0.030921,-0.211169,-0.168042,-0.125732
1,0.51,-0.100464,-0.123271,-0.032688,-0.217739,-0.172967,-0.129848
2,0.52,-0.104659,-0.127548,-0.034541,-0.224417,-0.177962,-0.134043
3,0.53,-0.108984,-0.131919,-0.036486,-0.231204,-0.183027,-0.138318
4,0.54,-0.113442,-0.136386,-0.038526,-0.238099,-0.188165,-0.142674


### Price Elasticity with No Promotion

In [45]:
df_price_elasticity_no_promotion = pd.DataFrame(price_range, columns=['Price_Range'])
df_price_elasticity_no_promotion['promotion'] =0
df_price_elasticity_no_promotion.head()

Unnamed: 0,Price_Range,promotion
0,0.5,0
1,0.51,0
2,0.52,0
3,0.53,0
4,0.54,0


In [46]:
y_no_promo = model.predict_proba(df_price_elasticity_no_promotion)
#select the probailiti colum 1
no_promo = y_no_promo[:,1]

price_elasticyti_no_promo = (model.coef_[:, 0] * price_range) * (1-no_promo)

In [47]:
# Update master data to include elasticities of purchase probability with promotion feature
df_price_elasticity['elasticity_promotion_0'] = price_elasticyti_no_promo
df_price_elasticity.head()

Unnamed: 0,Price_Point,means,pe_seg1,pe_seg2,pe_seg0,pe_seg3,elasticity_promotion_1,elasticity_promotion_0
0,0.5,-0.096396,-0.119088,-0.030921,-0.211169,-0.168042,-0.125732,-0.195644
1,0.51,-0.100464,-0.123271,-0.032688,-0.217739,-0.172967,-0.129848,-0.201765
2,0.52,-0.104659,-0.127548,-0.034541,-0.224417,-0.177962,-0.134043,-0.207989
3,0.53,-0.108984,-0.131919,-0.036486,-0.231204,-0.183027,-0.138318,-0.214316
4,0.54,-0.113442,-0.136386,-0.038526,-0.238099,-0.188165,-0.142674,-0.220747


In [48]:
df_price_elasticity.columns

Index(['Price_Point', 'means', 'pe_seg1', 'pe_seg2', 'pe_seg0', 'pe_seg3',
       'elasticity_promotion_1', 'elasticity_promotion_0'],
      dtype='object')

In [49]:
df_price_elasticity[['elasticity_promotion_1', 'elasticity_promotion_0']].iplot(kind = 'line',
                                                                               
                                                                               xTitle = 'prices', 
                                                                                yTitle ='Elasticity', 
                                                                               title='Prices Promo Elasticity')

## 𝐁𝐫𝐚𝐧𝐝 𝐂𝐡𝐨𝐢𝐜𝐞 
### Data Preparation

In [51]:
brand_choice = data[data.Incidence==1]
brand_choice.head()

Unnamed: 0,ID,Day,Incidence,Brand,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,...,Age,Education,Income,Occupation,Settlement size,segment,Segment_0,Segment_1,Segment_2,Segment_3
6,200000001,28,1,2,2,0,0,1.5,1.9,1.99,...,47,1,110866,1,0,2,0,0,1,0
11,200000001,58,1,5,1,0,0,1.39,1.9,1.91,...,47,1,110866,1,0,2,0,0,1,0
19,200000001,110,1,1,2,0,0,1.47,1.9,1.99,...,47,1,110866,1,0,2,0,0,1,0
24,200000001,142,1,4,1,0,0,1.21,1.35,1.99,...,47,1,110866,1,0,2,0,0,1,0
29,200000001,183,1,2,3,0,0,1.46,1.88,1.97,...,47,1,110866,1,0,2,0,0,1,0


#### Select X and y features

In [55]:
brand_choice.columns.values

array(['ID', 'Day', 'Incidence', 'Brand', 'Quantity', 'Last_Inc_Brand',
       'Last_Inc_Quantity', 'Price_1', 'Price_2', 'Price_3', 'Price_4',
       'Price_5', 'Promotion_1', 'Promotion_2', 'Promotion_3',
       'Promotion_4', 'Promotion_5', 'Sex', 'Marital status', 'Age',
       'Education', 'Income', 'Occupation', 'Settlement size', 'segment',
       'Segment_0', 'Segment_1', 'Segment_2', 'Segment_3'], dtype=object)

In [59]:
features = ['Price_1', 'Price_2', 'Price_3', 'Price_4','Price_5']
X = brand_choice[features]
y =brand_choice['Brand']
X.shape, y.shape

((14638, 5), (14638,))

In [60]:
y.value_counts()

5    4978
2    4542
4    2927
1    1350
3     841
Name: Brand, dtype: int64

#### Fit model

In [62]:
model_brand_choice = LogisticRegression(multi_class='multinomial', max_iter=300, solver='sag')
model_brand_choice.fit(X, y)

LogisticRegression(max_iter=300, multi_class='multinomial', solver='sag')

In [63]:
model_brand_choice.coef_

array([[-3.91597547,  0.65845388,  2.41765534,  0.70407423, -0.20212917],
       [ 1.27460441, -1.8798576 , -0.21267389, -0.20634621,  0.58786176],
       [ 1.62423299,  0.56212331,  0.50104083,  1.03953516,  0.45283521],
       [ 0.57452654,  0.3955847 , -1.40064355, -1.24625032,  0.2493396 ],
       [ 0.44261152,  0.2636957 , -1.30537873, -0.29101286, -1.0879074 ]])

#### Results

In [64]:
names = ['Coef_Brand_1', 'Coef_Brand_2', 'Coef_Brand_3', 'Coef_Brand_4', 'Coef_Brand_5']
names_indx = ['Price_1', 'Price_2', 'Price_3', 'Price_4', 'Price_5']
bc_coef = pd.DataFrame(np.transpose(model_brand_choice.coef_), index=names_indx, columns=names)
bc_coef

Unnamed: 0,Coef_Brand_1,Coef_Brand_2,Coef_Brand_3,Coef_Brand_4,Coef_Brand_5
Price_1,-3.915975,1.274604,1.624233,0.574527,0.442612
Price_2,0.658454,-1.879858,0.562123,0.395585,0.263696
Price_3,2.417655,-0.212674,0.501041,-1.400644,-1.305379
Price_4,0.704074,-0.206346,1.039535,-1.24625,-0.291013
Price_5,-0.202129,0.587862,0.452835,0.24934,-1.087907


In [66]:
bc_coef = bc_coef.round(2)

In [67]:
bc_coef

Unnamed: 0,Coef_Brand_1,Coef_Brand_2,Coef_Brand_3,Coef_Brand_4,Coef_Brand_5
Price_1,-3.92,1.27,1.62,0.57,0.44
Price_2,0.66,-1.88,0.56,0.4,0.26
Price_3,2.42,-0.21,0.5,-1.4,-1.31
Price_4,0.7,-0.21,1.04,-1.25,-0.29
Price_5,-0.2,0.59,0.45,0.25,-1.09


## Own Price Elasticity Brand 5

In [68]:
# We want to calculate price elasticity of brand choice.
# Here we create a data frame with price columns, which our model will use to predict the brand choice
df_own_brand_5 = pd.DataFrame(index=np.arange(price_range.size))
df_own_brand_5['Price_1'] = brand_choice['Price_1'].mean()
df_own_brand_5['Price_2'] = brand_choice['Price_2'].mean()
df_own_brand_5['Price_3'] = brand_choice['Price_3'].mean()
df_own_brand_5['Price_4'] = brand_choice['Price_4'].mean()
df_own_brand_5['Price_5'] = price_range
df_own_brand_5.head()

Unnamed: 0,Price_1,Price_2,Price_3,Price_4,Price_5
0,1.384559,1.764717,2.006694,2.159658,0.5
1,1.384559,1.764717,2.006694,2.159658,0.51
2,1.384559,1.764717,2.006694,2.159658,0.52
3,1.384559,1.764717,2.006694,2.159658,0.53
4,1.384559,1.764717,2.006694,2.159658,0.54


In [69]:
# Brand choice predictions
predict_brand_5 = model_brand_choice.predict_proba(df_own_brand_5)

In [71]:
predict_brand_5[:]

array([[0.03290801, 0.0217765 , 0.00543746, 0.02988823, 0.9099898 ],
       [0.03316274, 0.02211911, 0.00551556, 0.03025587, 0.90894672],
       [0.03341897, 0.0224668 , 0.0055947 , 0.0306276 , 0.90789194],
       ...,
       [0.06909844, 0.47391142, 0.07934642, 0.23880043, 0.13884329],
       [0.06881381, 0.4757025 , 0.07953883, 0.23889287, 0.137052  ],
       [0.06852819, 0.47748528, 0.07972918, 0.2389778 , 0.13527955]])

In [74]:
predict_brand_5[:][:,4]

array([0.9099898 , 0.90894672, 0.90789194, 0.90682535, 0.90574684,
       0.90465629, 0.90355361, 0.90243867, 0.90131137, 0.90017159,
       0.89901921, 0.89785413, 0.89667623, 0.8954854 , 0.89428153,
       0.89306449, 0.89183418, 0.89059049, 0.88933329, 0.88806248,
       0.88677794, 0.88547956, 0.88416721, 0.8828408 , 0.8815002 ,
       0.8801453 , 0.87877599, 0.87739215, 0.87599367, 0.87458044,
       0.87315235, 0.87170928, 0.87025113, 0.86877777, 0.8672891 ,
       0.86578502, 0.8642654 , 0.86273015, 0.86117915, 0.8596123 ,
       0.85802949, 0.85643061, 0.85481556, 0.85318424, 0.85153654,
       0.84987235, 0.84819159, 0.84649415, 0.84477992, 0.84304882,
       0.84130074, 0.8395356 , 0.83775329, 0.83595374, 0.83413683,
       0.8323025 , 0.83045065, 0.82858119, 0.82669404, 0.82478912,
       0.82286636, 0.82092566, 0.81896696, 0.81699018, 0.81499526,
       0.81298211, 0.81095068, 0.8089009 , 0.8068327 , 0.80474604,
       0.80264084, 0.80051706, 0.79837464, 0.79621354, 0.79403

In [75]:
#select the probrability of brand 5 = las columns -1
pr_own_brand_5 = predict_brand_5[: ][:, 4]

In [77]:
#select beta of brand 5 from bc_coef = -1.09
beta5 = bc_coef['Coef_Brand_5'][-1]
beta5

-1.09

In [78]:
# Calculating price elasticities for brand choice without promotion.
own_price_elasticity_brand_5  = beta5 * price_range * (1-pr_own_brand_5)

In [80]:
# aggregate column
df_price_elasticity['Brand_5'] = own_price_elasticity_brand_5
df_price_elasticity.head()

Unnamed: 0,Price_Point,means,pe_seg1,pe_seg2,pe_seg0,pe_seg3,elasticity_promotion_1,elasticity_promotion_0,Brand_5
0,0.5,-0.096396,-0.119088,-0.030921,-0.211169,-0.168042,-0.125732,-0.195644,-0.049056
1,0.51,-0.100464,-0.123271,-0.032688,-0.217739,-0.172967,-0.129848,-0.201765,-0.050617
2,0.52,-0.104659,-0.127548,-0.034541,-0.224417,-0.177962,-0.134043,-0.207989,-0.052207
3,0.53,-0.108984,-0.131919,-0.036486,-0.231204,-0.183027,-0.138318,-0.214316,-0.053827
4,0.54,-0.113442,-0.136386,-0.038526,-0.238099,-0.188165,-0.142674,-0.220747,-0.055477


In [83]:
df_price_elasticity['Brand_5'].iplot(kind = 'line', xTitle = 'prices5',color = 'blue',
                                     yTitle ='Elasticity', title='Own Prices Elasticity')

### Cross Price Elasticity Brand 5, Cross Brand 4

In [84]:
df_brand5_cross_brand4 = pd.DataFrame(index=np.arange(price_range.size))
df_brand5_cross_brand4['Price_1'] = brand_choice['Price_1'].mean()
df_brand5_cross_brand4['Price_2'] = brand_choice['Price_2'].mean()
df_brand5_cross_brand4['Price_3'] = brand_choice['Price_3'].mean()
df_brand5_cross_brand4['Price_4'] = price_range
df_brand5_cross_brand4['Price_5'] = brand_choice['Price_5'].mean()
df_brand5_cross_brand4.head()

Unnamed: 0,Price_1,Price_2,Price_3,Price_4,Price_5
0,1.384559,1.764717,2.006694,0.5,2.654296
1,1.384559,1.764717,2.006694,0.51,2.654296
2,1.384559,1.764717,2.006694,0.52,2.654296
3,1.384559,1.764717,2.006694,0.53,2.654296
4,1.384559,1.764717,2.006694,0.54,2.654296


In [85]:
# predict 
predict_brand5_cross_brand4 = model_brand_choice.predict_proba(df_brand5_cross_brand4)

In [89]:
#select the probability of brand 4
pr_brand_4 = predict_brand5_cross_brand4[:][ : , 3]

In [90]:
# In order to calculate the cross brand price elasticity, we need to use the new formula we introduced in the lecture.
# The elasticity is equal to negative the price coefficient of the own brand multiplied by the price of the cross brand,
# further multiplied by the probability for choosing the cross brand.

brand5_cross_brand4_price_elasticity = -beta5 * price_range * pr_brand_4

In [91]:
df_price_elasticity['Brand_5_Cross_Brand_4'] = brand5_cross_brand4_price_elasticity
df_price_elasticity.head()

Unnamed: 0,Price_Point,means,pe_seg1,pe_seg2,pe_seg0,pe_seg3,elasticity_promotion_1,elasticity_promotion_0,Brand_5,Brand_5_Cross_Brand_4
0,0.5,-0.096396,-0.119088,-0.030921,-0.211169,-0.168042,-0.125732,-0.195644,-0.049056,0.332011
1,0.51,-0.100464,-0.123271,-0.032688,-0.217739,-0.172967,-0.129848,-0.201765,-0.050617,0.337288
2,0.52,-0.104659,-0.127548,-0.034541,-0.224417,-0.177962,-0.134043,-0.207989,-0.052207,0.342507
3,0.53,-0.108984,-0.131919,-0.036486,-0.231204,-0.183027,-0.138318,-0.214316,-0.053827,0.347669
4,0.54,-0.113442,-0.136386,-0.038526,-0.238099,-0.188165,-0.142674,-0.220747,-0.055477,0.352773


In [92]:
df_price_elasticity['Brand_5_Cross_Brand_4'].iplot(kind = 'line', xTitle = 'prices4',color = 'blue',
                                     yTitle ='Elasticity', title='Cross Price Elastycity 5 and 4')

Aquí examinamos la elasticidad precio cruzada de la probabilidad de compra de la marca 5 con respecto a la marca 4.  
- Observamos que son positivos. A medida que aumenta el precio de la marca competidora,
también lo hace la probabilidad de comprar nuestra propia marca. Aunque la elasticidad comienza a disminuir desde la marca de 1,45, sigue siendo positiva, señalar que el aumento de la probabilidad de compra de la propia marca se produce de forma más lenta.

## 11-Price Elasticity of Purchase Quantity

In [94]:
# filter data by incedent =1 
df_purchase_quantity = df[df.Incidence==1]
df_purchase_quantity.head()

Unnamed: 0,ID,Day,Incidence,Brand,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,...,Promotion_3,Promotion_4,Promotion_5,Sex,Marital status,Age,Education,Income,Occupation,Settlement size
6,200000001,28,1,2,2,0,0,1.5,1.9,1.99,...,0,0,0,0,0,47,1,110866,1,0
11,200000001,58,1,5,1,0,0,1.39,1.9,1.91,...,0,0,1,0,0,47,1,110866,1,0
19,200000001,110,1,1,2,0,0,1.47,1.9,1.99,...,0,1,0,0,0,47,1,110866,1,0
24,200000001,142,1,4,1,0,0,1.21,1.35,1.99,...,0,0,0,0,0,47,1,110866,1,0
29,200000001,183,1,2,3,0,0,1.46,1.88,1.97,...,0,1,1,0,0,47,1,110866,1,0


In [98]:
# dummies brand variables
df_purchase_quantity = pd.get_dummies(df_purchase_quantity,columns=['Brand'], prefix='Brand')
df_purchase_quantity.head()

Unnamed: 0,ID,Day,Incidence,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,Price_4,...,Age,Education,Income,Occupation,Settlement size,Brand_1,Brand_2,Brand_3,Brand_4,Brand_5
6,200000001,28,1,2,0,0,1.5,1.9,1.99,2.09,...,47,1,110866,1,0,0,1,0,0,0
11,200000001,58,1,1,0,0,1.39,1.9,1.91,2.12,...,47,1,110866,1,0,0,0,0,0,1
19,200000001,110,1,2,0,0,1.47,1.9,1.99,1.97,...,47,1,110866,1,0,1,0,0,0,0
24,200000001,142,1,1,0,0,1.21,1.35,1.99,2.16,...,47,1,110866,1,0,0,0,0,1,0
29,200000001,183,1,3,0,0,1.46,1.88,1.97,1.89,...,47,1,110866,1,0,0,1,0,0,0


In [100]:
df_purchase_quantity.describe()

Unnamed: 0,ID,Day,Incidence,Quantity,Last_Inc_Brand,Last_Inc_Quantity,Price_1,Price_2,Price_3,Price_4,...,Age,Education,Income,Occupation,Settlement size,Brand_1,Brand_2,Brand_3,Brand_4,Brand_5
count,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,...,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0,14638.0
mean,200000300.0,359.607597,1.0,2.774559,1.535251,0.406955,1.384559,1.764717,2.006694,2.159658,...,40.346905,1.151045,126706.452043,0.845471,0.797513,0.092226,0.310288,0.057453,0.199959,0.340074
std,142.3786,205.346253,0.0,1.796736,2.060858,0.491283,0.090944,0.181667,0.0462,0.093253,...,12.346284,0.634625,39935.109358,0.63384,0.810358,0.289354,0.462627,0.232714,0.399983,0.47375
min,200000000.0,1.0,1.0,1.0,0.0,0.0,1.1,1.26,1.87,1.76,...,18.0,0.0,38247.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,200000100.0,175.0,1.0,2.0,0.0,0.0,1.34,1.57,1.97,2.12,...,31.0,1.0,102723.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,200000300.0,355.0,1.0,2.0,0.0,0.0,1.39,1.88,2.01,2.18,...,37.0,1.0,122895.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
75%,200000400.0,532.0,1.0,3.0,4.0,1.0,1.47,1.89,2.06,2.24,...,49.0,2.0,147967.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
max,200000500.0,727.0,1.0,15.0,5.0,1.0,1.59,1.9,2.14,2.26,...,75.0,3.0,309364.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0


In [101]:
df_purchase_quantity.columns.values

array(['ID', 'Day', 'Incidence', 'Quantity', 'Last_Inc_Brand',
       'Last_Inc_Quantity', 'Price_1', 'Price_2', 'Price_3', 'Price_4',
       'Price_5', 'Promotion_1', 'Promotion_2', 'Promotion_3',
       'Promotion_4', 'Promotion_5', 'Sex', 'Marital status', 'Age',
       'Education', 'Income', 'Occupation', 'Settlement size', 'Brand_1',
       'Brand_2', 'Brand_3', 'Brand_4', 'Brand_5'], dtype=object)

In [102]:
#Find the price of the product that is chosen at this incidence
df_purchase_quantity['price_incidence'] = (
    df_purchase_quantity['Brand_1'] * df_purchase_quantity['Price_1'] +
    df_purchase_quantity['Brand_2'] * df_purchase_quantity['Price_2'] +
    df_purchase_quantity['Brand_3'] * df_purchase_quantity['Price_3'] +
    df_purchase_quantity['Brand_4'] * df_purchase_quantity['Price_4'] +
    df_purchase_quantity['Brand_5'] * df_purchase_quantity['Price_5'] 

)

In [103]:
#Find the price of the product that is chosen at this incidence
df_purchase_quantity['promotion_incidence'] = (
    df_purchase_quantity['Brand_1'] * df_purchase_quantity['Promotion_1'] +
    df_purchase_quantity['Brand_2'] * df_purchase_quantity['Promotion_2'] +
    df_purchase_quantity['Brand_3'] * df_purchase_quantity['Promotion_3'] +
    df_purchase_quantity['Brand_4'] * df_purchase_quantity['Promotion_4'] +
    df_purchase_quantity['Brand_5'] * df_purchase_quantity['Promotion_5'] 

)

#### Seelct X and y

In [105]:
X = df_purchase_quantity[['price_incidence','promotion_incidence']]
y = df_purchase_quantity['Quantity']

### Model Estimation. Linear Regression

In [106]:
model_quantity = LinearRegression()
model_quantity.fit(X, y)

LinearRegression()

In [107]:
model_quantity.coef_

array([-0.8173651 , -0.10504673])

Parece que la promoción se refleja negativamente en la cantidad de compra del cliente promedio, lo cual es inesperado.

#### Price Elasticity Estimation

In [109]:
df_price_elasticity_quantity = pd.DataFrame(index=np.arange(price_range.size))
df_price_elasticity_quantity['Price_incidence'] = price_range
df_price_elasticity_quantity['Promotion_incidence'] =1
df_price_elasticity_quantity.head()

Unnamed: 0,Price_incidence,Promotion_incidence
0,0.5,1
1,0.51,1
2,0.52,1
3,0.53,1
4,0.54,1


In [111]:
beta_quantity = model_quantity.coef_[0]
beta_quantity

-0.8173651016685023

In [114]:
predict_quantity = model_quantity.predict(df_price_elasticity_quantity)

In [117]:
pe_quantity_with_promo = beta_quantity * price_range/predict_quantity

In [118]:
df_price_elasticity['pe_quantity_promotion1'] = pe_quantity_with_promo
df_price_elasticity.head()

Unnamed: 0,Price_Point,means,pe_seg1,pe_seg2,pe_seg0,pe_seg3,elasticity_promotion_1,elasticity_promotion_0,Brand_5,Brand_5_Cross_Brand_4,pe_quantity_promotion1
0,0.5,-0.096396,-0.119088,-0.030921,-0.211169,-0.168042,-0.125732,-0.195644,-0.049056,0.332011,-0.102157
1,0.51,-0.100464,-0.123271,-0.032688,-0.217739,-0.172967,-0.129848,-0.201765,-0.050617,0.337288,-0.104413
2,0.52,-0.104659,-0.127548,-0.034541,-0.224417,-0.177962,-0.134043,-0.207989,-0.052207,0.342507,-0.106679
3,0.53,-0.108984,-0.131919,-0.036486,-0.231204,-0.183027,-0.138318,-0.214316,-0.053827,0.347669,-0.108954
4,0.54,-0.113442,-0.136386,-0.038526,-0.238099,-0.188165,-0.142674,-0.220747,-0.055477,0.352773,-0.111238


In [119]:
df_price_elasticity['pe_quantity_promotion1'].iplot(kind = 'line', xTitle = 'prices',color = 'blue',
                                     yTitle ='Elasticity', title='Price Elasticity of Purchase Quantity with Promotion ')

## With no promo

In [123]:
df_price_elasticity_quantity['Promotion_incidence'] =0

In [126]:
df_price_elasticity_quantity['Promotion_incidence'].unique()

array([0], dtype=int64)

In [129]:
predict_quantity_no_promo = model_quantity.predict(df_price_elasticity_quantity)

In [130]:
pe_quantity_no_promo = beta_quantity * price_range/predict_quantity_no_promo