# Marketing Analytics Project 4: Marketing

In [78]:
import pandas as pd
import numpy as np

In [79]:
transactions = pd.read_excel('ACSE - Mahou San Miguel data.xlsx', sheet_name='transactions')
products = pd.read_excel('ACSE - Mahou San Miguel data.xlsx', sheet_name='products')
seasonality = pd.read_excel('ACSE - Mahou San Miguel data.xlsx', sheet_name='seasonality')
promo_ad = pd.read_excel('ACSE - Mahou San Miguel data.xlsx', sheet_name='promo_ad')
event_holiday = pd.read_excel('ACSE - Mahou San Miguel data.xlsx', sheet_name='event_holiday')

## Data Manipulation

#### transactions

In [80]:
transactions.dtypes

cust_id               int64
prod_id               int64
trans_id              int64
trans_dt     datetime64[ns]
sales_qty             int64
sales_amt           float64
dtype: object

In [81]:
# Manipulate the time format
transactions['trans_dt'] = pd.to_datetime(transactions['trans_dt'])
transactions['year'] = transactions['trans_dt'].dt.year
transactions['month'] = transactions['trans_dt'].dt.month
transactions['day'] = transactions['trans_dt'].dt.day
transactions['week'] = transactions['trans_dt'].apply(lambda x: x.weekofyear if x.weekday() == 6 else x.weekofyear - 1)
transactions = transactions[['year', 'month', 'day', 'week', 'cust_id', 'prod_id', 'trans_id', 'sales_qty', 'sales_amt']]
transactions.head()

Unnamed: 0,year,month,day,week,cust_id,prod_id,trans_id,sales_qty,sales_amt
0,2019,1,27,4,1126801552,23141593,190127010000109698,1,9.99
1,2019,1,27,4,1129263437,23141593,190127010000109643,1,9.99
2,2019,1,27,4,1148577540,23141593,190127010000109827,1,9.99
3,2019,1,27,4,1131341193,23141592,190127010000206031,1,3.99
4,2019,1,27,4,1151750173,23141592,190127010000206174,2,7.98


In [82]:
# Aggregate to week level
trans_week = transactions.groupby(['year', 'week', 'prod_id'])[['sales_qty', 'sales_amt']].sum().reset_index()

In [83]:
trans_week.head()

Unnamed: 0,year,week,prod_id,sales_qty,sales_amt
0,2019,0,23141592,264,1185.36
1,2019,0,23141593,87,1217.13
2,2019,4,23141592,1886,7525.14
3,2019,4,23141593,2465,24625.35
4,2019,5,23141592,777,3877.23


#### seasonality

In [84]:
seasonality.dtypes

week          datetime64[ns]
seas_index           float64
dtype: object

In [85]:
# Manipulate the time format
seasonality['year'] = seasonality['week'].dt.year
seasonality['week'] = seasonality['week'].apply(lambda x: x.weekofyear if x.weekday() == 6 else x.weekofyear - 1)
seasonality = seasonality[['year', 'week', 'seas_index']]
seasonality

Unnamed: 0,year,week,seas_index
0,2017,25,1.100412
1,2017,26,1.102850
2,2017,27,0.961778
3,2017,28,1.135816
4,2017,29,1.101576
...,...,...,...
176,2020,47,0.697492
177,2020,48,0.671990
178,2020,49,0.715360
179,2020,50,1.057361


#### promo_ad

In [86]:
promo_ad.dtypes

week       datetime64[ns]
vehicle            object
amount              int64
unit               object
prod_id            object
cost              float64
dtype: object

In [87]:
# Manipulate the time format
promo_ad['year'] = promo_ad['week'].dt.year
promo_ad['week'] = promo_ad['week'].apply(lambda x: x.weekofyear if x.weekday() == 6 else x.weekofyear - 1)

In [88]:
# Manipulate the rows with "ALL" prod_id
all_rows = promo_ad[promo_ad['prod_id'] == 'ALL']

# Create two new rows for each prod_id
new_rows_1 = all_rows.copy()
new_rows_1['prod_id'] = 23141592
new_rows_1['cost'] = all_rows['cost'] / 2

new_rows_2 = all_rows.copy()
new_rows_2['prod_id'] = 23141593
new_rows_2['cost'] = all_rows['cost'] / 2

promo_ad = pd.concat([promo_ad, new_rows_1, new_rows_2])

# Remove the original rows with 'ALL'
promo_ad = promo_ad[promo_ad['prod_id'] != 'ALL']

# Modify the special case: Display
promo_ad.loc[(promo_ad['vehicle'] == 'Display') & (promo_ad['prod_id'] == 23141592), 'cost'] = 1500
promo_ad.loc[(promo_ad['vehicle'] == 'Display') & (promo_ad['prod_id'] == 23141593), 'cost'] = 3000

promo_ad = promo_ad[['year', 'week', 'vehicle', 'amount', 'unit', 'prod_id', 'cost']].sort_values(by=['vehicle', 'year', 'week'])#.reset_index()

In [89]:
promo_ad.head()

Unnamed: 0,year,week,vehicle,amount,unit,prod_id,cost
0,2019,4,Display,1,,23141592,1500.0
0,2019,4,Display,1,,23141593,3000.0
1,2019,9,Display,1,,23141593,3000.0
2,2019,10,Display,1,,23141593,3000.0
3,2019,13,Display,1,,23141593,3000.0


In [90]:
# Split the promo_ad table by different vehicles
promo_display = promo_ad[promo_ad['vehicle'] == 'Display']
promo_fb = promo_ad[promo_ad['vehicle'] == 'Facebook Banner']
promo_flyer = promo_ad[promo_ad['vehicle'] == 'Flyer']
promo_google = promo_ad[promo_ad['vehicle'] == 'Google Paid Search']
promo_radio = promo_ad[promo_ad['vehicle'] == 'Radio']
promo_tv = promo_ad[promo_ad['vehicle'] == 'TV']

In [91]:
# Reach Calculation: Radio and TV

# Radio
# Get the distinct time & GRP values
radio_grp = promo_radio[['year', 'week', 'amount']].drop_duplicates().reset_index(drop=True)
# Create a new year/week list
years = radio_grp['year'].unique()
weeks = list(range(1, 53))
all_weeks = pd.DataFrame([(year, week) for year in years for week in weeks], columns=['year', 'week'])
radio_grp = pd.merge(all_weeks, radio_grp, on=['year', 'week'], how='left').fillna(0)


# Calculate decay parameter alpha
half_life = 3
alpha = 1 - (0.5) ** (1 / half_life)

# Create & Calculate the adstock_grp
radio_grp['adstock_grp'] = 0

for i in range(len(radio_grp)):
    if i == 0:
        radio_grp.loc[i, 'adstock_grp'] = alpha * radio_grp.loc[i, 'amount']
    else:
        radio_grp.loc[i, 'adstock_grp'] = alpha * radio_grp.loc[i, 'amount'] + (1 - alpha) * radio_grp.loc[i - 1, 'adstock_grp']

# Calculate the Reach
radio_grp['Reach'] = 0.90 * (1 - np.exp(-0.5 * radio_grp['adstock_grp']))

radio_grp

Unnamed: 0,year,week,amount,adstock_grp,Reach
0,2019,1,0.0,0.000000,0.000000
1,2019,2,0.0,0.000000,0.000000
2,2019,3,15.0,3.094492,0.708450
3,2019,4,10.0,4.519095,0.806042
4,2019,5,5.0,4.618305,0.810589
...,...,...,...,...,...
99,2020,48,0.0,0.228426,0.097139
100,2020,49,0.0,0.181302,0.077997
101,2020,50,0.0,0.143899,0.062480
102,2020,51,0.0,0.114213,0.049956


In [92]:
# TV
# Get the distinct time & GRP values
tv_grp = promo_tv[['year', 'week', 'amount']].drop_duplicates().reset_index(drop=True)
# Create a new year/week list
years = tv_grp['year'].unique()
weeks = list(range(1, 53))
tv_grp = pd.merge(all_weeks, tv_grp, on=['year', 'week'], how='left').fillna(0)


# Calculate decay parameter alpha
half_life = 6
alpha = 1 - (0.5) ** (1 / half_life)

# Create & Calculate the adstock_grp
tv_grp['adstock_grp'] = 0

for i in range(len(tv_grp)):
    if i == 0:
        tv_grp.loc[i, 'adstock_grp'] = alpha * tv_grp.loc[i, 'amount']
    else:
        tv_grp.loc[i, 'adstock_grp'] = alpha * tv_grp.loc[i, 'amount'] + (1 - alpha) * tv_grp.loc[i - 1, 'adstock_grp']

# Calculate the Reach
tv_grp['Reach'] = 0.95 * (1 - np.exp(-0.4 * tv_grp['adstock_grp']))

tv_grp

Unnamed: 0,year,week,amount,adstock_grp,Reach
0,2019,1,30.0,3.273038,0.693470
1,2019,2,15.0,4.552465,0.796227
2,2019,3,15.0,5.692304,0.852530
3,2019,4,0.0,5.071267,0.825045
4,2019,5,0.0,4.517985,0.794092
...,...,...,...,...,...
99,2020,48,0.0,0.529314,0.181272
100,2020,49,0.0,0.471565,0.163309
101,2020,50,0.0,0.420116,0.146951
102,2020,51,0.0,0.374281,0.132092


#### event_holiday

In [93]:
# Manipulate the time format
event_holiday['year'] = event_holiday['week'].dt.year
event_holiday['week'] = event_holiday['week'].apply(lambda x: x.weekofyear if x.weekday() == 6 else x.weekofyear - 1)

In [94]:
event_holiday

Unnamed: 0,week,event_holiday,year
0,52,New Year,2018
1,4,Pre Super Bowl,2019
2,5,Super Bowl,2019
3,15,Pre Easter,2019
4,16,Easter,2019
5,26,National Day,2019
6,35,Labor Day,2019
7,43,Halloween,2019
8,45,Memorial Day,2019
9,51,Christmas,2019


In [95]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
event_holiday['event'] = le.fit_transform(event_holiday['event_holiday']) + 1

In [54]:
event_holiday = event_holiday[['year', 'week', 'event_holiday', 'event']]

In [96]:
# Special case: New Year; add two weeks (2019 & 2020 Week 1) as New Year weeks
new_year = pd.DataFrame([[2019, 1, 'New Year', 7],
                         [2020, 1, 'New Year', 7]],
                         columns=['year', 'week', 'event_holiday', 'event'])
event_holiday = event_holiday.append(new_year, ignore_index=True)
event_holiday.head()

  event_holiday = event_holiday.append(new_year, ignore_index=True)


Unnamed: 0,week,event_holiday,year,event
0,52,New Year,2018,7
1,4,Pre Super Bowl,2019,9
2,5,Super Bowl,2019,10
3,15,Pre Easter,2019,8
4,16,Easter,2019,2


## Data Merge

In [98]:
# Merge seasonality
df = pd.merge(trans_week, seasonality, on=['year', 'week'], how='left')
# Check the missing values
df.isna().sum()

year          0
week          0
prod_id       0
sales_qty     0
sales_amt     0
seas_index    4
dtype: int64

In [99]:
# missing weeks for the end of 2020; fill with 1
df = df.fillna(1)

In [100]:
# Merge event
df = pd.merge(df, event_holiday[['year', 'week', 'event']], on=['year', 'week'], how='left')
df.isna().sum()

year            0
week            0
prod_id         0
sales_qty       0
sales_amt       0
seas_index      0
event         166
dtype: int64

In [101]:
# Fill na with 0 (0 represents "no events")
df = df.fillna(0)

In [102]:
# Merge promo: display; 1 for display, 0 for non-display
df = pd.merge(df, promo_display[['year', 'week', 'prod_id', 'amount']], on=['year', 'week', 'prod_id'], how='left').fillna(0)
# Rename the amount column
df.rename(columns = {'amount': 'promo_display'}, inplace = True)

In [103]:
# Merge other promo tables
df = pd.merge(df, promo_fb[['year', 'week', 'prod_id', 'amount']], on=['year', 'week', 'prod_id'], how='left').fillna(0)
df.rename(columns = {'amount': 'promo_fb'}, inplace = True)

In [104]:
df = pd.merge(df, promo_flyer[['year', 'week', 'prod_id', 'amount']], on=['year', 'week', 'prod_id'], how='left').fillna(0)
df.rename(columns = {'amount': 'promo_flyer'}, inplace = True)

In [105]:
df = pd.merge(df, promo_google[['year', 'week', 'prod_id', 'amount']], on=['year', 'week', 'prod_id'], how='left').fillna(0)
df.rename(columns = {'amount': 'promo_google'}, inplace = True)

In [106]:
# Use the calculated Reach for radio and tv; no need to join by prod_id since they are promotion approaches for both products all the time
df = pd.merge(df, radio_grp[['year', 'week', 'Reach']], on=['year', 'week'], how='left')
df.rename(columns = {'Reach': 'promo_radio_reach'}, inplace = True)

In [107]:
df = pd.merge(df, tv_grp[['year', 'week', 'Reach']], on=['year', 'week'], how='left')
df.rename(columns = {'Reach': 'promo_tv_reach'}, inplace = True)

In [110]:
df = df.fillna(0)

In [111]:
# Overview of the dataset for modeling
df.head()

Unnamed: 0,year,week,prod_id,sales_qty,sales_amt,seas_index,event,promo_display,promo_fb,promo_flyer,promo_google,promo_radio_reach,promo_tv_reach
0,2019,0,23141592,264,1185.36,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019,0,23141593,87,1217.13,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019,4,23141592,1886,7525.14,0.87061,9.0,1.0,98373.0,1.0,8611.0,0.806042,0.825045
3,2019,4,23141593,2465,24625.35,0.87061,9.0,1.0,98373.0,1.0,8611.0,0.806042,0.825045
4,2019,5,23141592,777,3877.23,0.927523,10.0,0.0,31001.0,0.0,8245.0,0.810589,0.794092


## Modeling

In [113]:
df = df.drop('sales_qty', axis = 1)

In [167]:
# Split the dataset
df1 = df[df['prod_id'] == 23141592]
df2 = df[df['prod_id'] == 23141593]

In [114]:
formula = 'sales_amt ~ seas_index + event + promo_display + promo_fb + promo_flyer + promo_google + promo_radio_reach + promo_tv_reach'

In [118]:
import statsmodels.formula.api as smf
model1 = smf.ols(formula=formula, data=df1).fit()
model2 = smf.ols(formula=formula, data=df2).fit()

In [119]:
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:              sales_amt   R-squared:                       0.841
Model:                            OLS   Adj. R-squared:                  0.828
Method:                 Least Squares   F-statistic:                     61.63
Date:                Mon, 17 Apr 2023   Prob (F-statistic):           7.65e-34
Time:                        14:47:55   Log-Likelihood:                -837.35
No. Observations:                 102   AIC:                             1693.
Df Residuals:                      93   BIC:                             1716.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept         -2303.6310    615.60

In [120]:
print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:              sales_amt   R-squared:                       0.708
Model:                            OLS   Adj. R-squared:                  0.683
Method:                 Least Squares   F-statistic:                     28.20
Date:                Mon, 17 Apr 2023   Prob (F-statistic):           9.37e-22
Time:                        14:48:19   Log-Likelihood:                -937.91
No. Observations:                 102   AIC:                             1894.
Df Residuals:                      93   BIC:                             1917.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept         -2715.7394   1651.95

In [123]:
# log tranformation for y
df1['log_sales'] = df1['sales_amt'].apply(np.log)
df2['log_sales'] = df2['sales_amt'].apply(np.log)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['log_sales'] = df1['sales_amt'].apply(np.log)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['log_sales'] = df2['sales_amt'].apply(np.log)


In [125]:
formula2 = 'log_sales ~ seas_index + event + promo_display + promo_fb + promo_flyer + promo_google + promo_radio_reach + promo_tv_reach'
logmodel1 = smf.ols(formula=formula2, data=df1).fit()
logmodel2 = smf.ols(formula=formula2, data=df2).fit()

In [126]:
print(logmodel1.summary())

                            OLS Regression Results                            
Dep. Variable:              log_sales   R-squared:                       0.827
Model:                            OLS   Adj. R-squared:                  0.812
Method:                 Least Squares   F-statistic:                     55.44
Date:                Mon, 17 Apr 2023   Prob (F-statistic):           4.43e-32
Time:                        15:04:41   Log-Likelihood:                 10.776
No. Observations:                 102   AIC:                            -3.551
Df Residuals:                      93   BIC:                             20.07
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             6.5970      0.15

In [127]:
print(logmodel2.summary())

                            OLS Regression Results                            
Dep. Variable:              log_sales   R-squared:                       0.837
Model:                            OLS   Adj. R-squared:                  0.823
Method:                 Least Squares   F-statistic:                     59.68
Date:                Mon, 17 Apr 2023   Prob (F-statistic):           2.64e-33
Time:                        15:04:49   Log-Likelihood:                -37.017
No. Observations:                 102   AIC:                             92.03
Df Residuals:                      93   BIC:                             115.7
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             5.8746      0.24

## Deployment

#### Product 1 (Single Beer); We use addictive model for this

In [129]:
# Calculate the DueTos
beta_promo_display = model1.params['promo_display']
df1['DueTo_promo_display'] = beta_promo_display * df1['promo_display']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['DueTo_promo_display'] = beta_promo_display * df1['promo_display']


In [133]:
beta_promo_fb = model1.params['promo_fb']
df1['DueTo_promo_fb'] = beta_promo_fb * df1['promo_fb']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['DueTo_promo_fb'] = beta_promo_fb * df1['promo_fb']


In [136]:
beta_promo_flyer = model1.params['promo_flyer']
df1['DueTo_promo_flyer'] = beta_promo_flyer * df1['promo_flyer']

beta_promo_gg = model1.params['promo_google']
df1['DueTo_promo_gg'] = beta_promo_gg * df1['promo_google']

beta_promo_radio = model1.params['promo_radio_reach']
df1['DueTo_promo_radio'] = beta_promo_radio * df1['promo_radio_reach']

beta_promo_tv = model1.params['promo_tv_reach']
df1['DueTo_promo_tv'] = beta_promo_tv * df1['promo_tv_reach']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['DueTo_promo_flyer'] = beta_promo_flyer * df1['promo_flyer']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['DueTo_promo_gg'] = beta_promo_gg * df1['promo_google']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['DueTo_promo_radio'] = beta_promo_radio * df1['promo_radio_reach']
A value i

In [140]:
df1['DueTo_promo_display'].sum()

1238.2645961299067

In [141]:
df1['DueTo_promo_fb'].sum()

-11615.413218409272

In [142]:
df1['DueTo_promo_flyer'].sum()

62887.62399384918

In [143]:
df1['DueTo_promo_gg'].sum()

33196.72243146848

In [144]:
df1['DueTo_promo_radio'].sum()

50037.54837462617

In [145]:
df1['DueTo_promo_tv'].sum()

26384.640443794848

In [147]:
# Compared with the Cost
promo_ad1 = promo_ad[promo_ad['prod_id'] == 23141592]
promo_ad1[promo_ad1['vehicle'] == 'Display']['cost'].sum()

1500.0

In [148]:
promo_ad1[promo_ad1['vehicle'] == 'Facebook Banner']['cost'].sum()

28445.25

In [149]:
promo_ad1[promo_ad1['vehicle'] == 'Flyer']['cost'].sum()

32875.0

In [150]:
promo_ad1[promo_ad1['vehicle'] == 'Google Paid Search']['cost'].sum()

37464.5

In [151]:
promo_ad1[promo_ad1['vehicle'] == 'Radio']['cost'].sum()

24600.0

In [152]:
promo_ad1[promo_ad1['vehicle'] == 'TV']['cost'].sum()

69333.33333333334

In [154]:
# Store in a table and Compare
compare1 = pd.DataFrame(
    {
    'Vehicle': ['Display', 'Facebook Banner', 'Flyer', 'Google Paid Search', 'Radio', 'TV'],
    'DueTo Sales': [1238.2645961299067, -11615.413218409272, 62887.62399384918, 33196.72243146848, 50037.54837462617, 26384.640443794848],
    'Cost':[1500.0, 28445.25, 32875.0, 37464.5, 24600.0, 69333.3333333334]
    }
)
compare1

Unnamed: 0,Vehicle,DueTo Sales,Cost
0,Display,1238.264596,1500.0
1,Facebook Banner,-11615.413218,28445.25
2,Flyer,62887.623994,32875.0
3,Google Paid Search,33196.722431,37464.5
4,Radio,50037.548375,24600.0
5,TV,26384.640444,69333.333333


In [155]:
# Calculate DueTo profit; use an assumed profit margin for individual beer as 0.5
margin = 0.5
compare1['DueTo Profits'] = compare1['DueTo Sales'] * margin
compare1['Effective Promotion'] = ['Effective' if x > y else 'Ineffective' for x, y in zip(compare1['DueTo Profits'], compare1['Cost'])]
compare1

Unnamed: 0,Vehicle,DueTo Sales,Cost,DueTo Profits,Effective Promotion
0,Display,1238.264596,1500.0,619.132298,Ineffective
1,Facebook Banner,-11615.413218,28445.25,-5807.706609,Ineffective
2,Flyer,62887.623994,32875.0,31443.811997,Ineffective
3,Google Paid Search,33196.722431,37464.5,16598.361216,Ineffective
4,Radio,50037.548375,24600.0,25018.774187,Effective
5,TV,26384.640444,69333.333333,13192.320222,Ineffective


### Product 2. Package Beer; We use multiplicative model

In [156]:
df_base = df2.copy()
df_base['promo_display'] = 0

In [157]:
log_pred_base = logmodel2.predict(df_base)
pred_base = np.exp(log_pred_base)

In [160]:
pred_original = np.exp(logmodel2.predict(df2))

In [168]:
df2['DueTo_promo_display'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_display'] = pred_original - pred_base


In [173]:
df_base = df2.copy()
df_base['promo_fb'] = 0
pred_base = np.exp(logmodel2.predict(df_base))
df2['DueTo_promo_fb'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_fb'] = pred_original - pred_base


In [174]:
df_base = df2.copy()
df_base['promo_flyer'] = 0
pred_base = np.exp(logmodel2.predict(df_base))
df2['DueTo_promo_flyer'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_flyer'] = pred_original - pred_base


In [175]:
df_base = df2.copy()
df_base['promo_google'] = 0
pred_base = np.exp(logmodel2.predict(df_base))
df2['DueTo_promo_google'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_google'] = pred_original - pred_base


In [176]:
df_base = df2.copy()
df_base['promo_radio_reach'] = 0
pred_base = np.exp(logmodel2.predict(df_base))
df2['DueTo_promo_radio'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_radio'] = pred_original - pred_base


In [177]:
df_base = df2.copy()
df_base['promo_tv_reach'] = 0
pred_base = np.exp(logmodel2.predict(df_base))
df2['DueTo_promo_tv'] = pred_original - pred_base

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['DueTo_promo_tv'] = pred_original - pred_base


In [188]:
df2['DueTo_promo_display'].sum()

40742.54600056619

In [190]:
df2['DueTo_promo_fb'].sum()

-4007.807794359528

In [192]:
df2['DueTo_promo_flyer'].sum()

68624.986320638

In [193]:
df2['DueTo_promo_google'].sum()

167304.63147023454

In [195]:
df2['DueTo_promo_radio'].sum()

86619.38521309891

In [196]:
df2['DueTo_promo_tv'].sum()

140478.42641120995

In [180]:
promo_ad2 = promo_ad[promo_ad['prod_id'] == 23141593]

In [182]:
promo_ad2[promo_ad2['vehicle'] == 'Display']['cost'].sum()

63000.0

In [183]:
promo_ad2[promo_ad2['vehicle'] == 'Facebook Banner']['cost'].sum()

28445.25

In [184]:
promo_ad2[promo_ad2['vehicle'] == 'Flyer']['cost'].sum()

31375.0

In [185]:
promo_ad2[promo_ad2['vehicle'] == 'Google Paid Search']['cost'].sum()

37464.5

In [186]:
promo_ad2[promo_ad2['vehicle'] == 'Radio']['cost'].sum()

24600.0

In [187]:
promo_ad2[promo_ad2['vehicle'] == 'TV']['cost'].sum()

69333.33333333334

In [197]:
compare2 = pd.DataFrame(
    {
    'Vehicle': ['Display', 'Facebook Banner', 'Flyer', 'Google Paid Search', 'Radio', 'TV'],
    'DueTo Sales': [40742.54600056619, -4007.807794359528, 68624.986320638, 167304.63147023454, 86619.38521309891, 140478.42641120995],
    'Cost':[63000.0, 28445.25, 31375.0, 37464.5, 24600.0, 69333.3333333334]
    }
)
compare2

Unnamed: 0,Vehicle,DueTo Sales,Cost
0,Display,40742.546001,63000.0
1,Facebook Banner,-4007.807794,28445.25
2,Flyer,68624.986321,31375.0
3,Google Paid Search,167304.63147,37464.5
4,Radio,86619.385213,24600.0
5,TV,140478.426411,69333.333333


In [198]:
margin2 = 0.4
compare2['DueTo Profits'] = compare2['DueTo Sales'] * margin2
compare2['Effective Promotion'] = ['Effective' if x > y else 'Ineffective' for x, y in zip(compare2['DueTo Profits'], compare2['Cost'])]
compare2

Unnamed: 0,Vehicle,DueTo Sales,Cost,DueTo Profits,Effective Promotion
0,Display,40742.546001,63000.0,16297.0184,Ineffective
1,Facebook Banner,-4007.807794,28445.25,-1603.123118,Ineffective
2,Flyer,68624.986321,31375.0,27449.994528,Ineffective
3,Google Paid Search,167304.63147,37464.5,66921.852588,Effective
4,Radio,86619.385213,24600.0,34647.754085,Effective
5,TV,140478.426411,69333.333333,56191.370564,Ineffective
