In [73]:
#          Dependencies

import pandas as pd
import statsmodels as sm
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot
import matplotlib.pyplot as plt
from scipy import stats

In [74]:
#          Define functions

def conv_rate_pivot_tbl_xy(x,y):
    # two-way pivot table
    # args (x,y): x = variable 1 , y = variable 2
    pvdf_xy = df.groupby([x,y],as_index=False)[('Impressions','Conversions')].sum()
    pvdf_xy['ConvRate'] = pvdf_xy.Conversions/pvdf_xy.Impressions
    return pvdf_xy
    
def conv_rate_pivot_tbl_x(x):
    # one-way pivot table
    # arg (x): x = variable
    pvdf_x = df.groupby([x],as_index=False)[('Impressions','Conversions')].sum()
    pvdf_x['ConvRate'] = pvdf_x.Conversions/pvdf_x.Impressions
    return pvdf_x

def two_way_anova_test(x,y):
    # two-way pivot table
    # args (x,y): x = variable 1 , y = variable 2
    formula = 'ConvRate ~ x + y + x:y'
    model = ols(formula,data=df).fit()
    aov_table = anova_lm(model, typ=1)
    return aov_table

def one_way_anova_test(x):
    # one-way anova test
    # arg (x): x = variable
    formula = 'ConvRate ~ x'
    model = ols(formula,data=df).fit()
    aov_table = anova_lm(model, typ=2)
    return aov_table

In [75]:
#          Upload data

# Note "skiprows"
datafile = "/Users/justinpassek/Documents/6469_Caesars_-_v5_v6_v7_20181203_170357_2026719673.csv"
df = pd.read_csv(datafile,skiprows=30)

In [76]:
#          Column mutation

# Change column names
new_names = {'Mercury Res Module - Sales : RES Step 5 (Confirmation Page): Total Conversions':'Conversions',
            'Feed 1 - Reporting dimension 2 value':'CTA',
            'Feed 1 - Reporting dimension 1 value':'Layout',
            'Feed 1 - Reporting dimension 4 value':'Strategy',
            'Dynamic Profile':'Dynamic_Profile',
            'Feed 1 - Reporting dimension 5 value':'Carousel_Style',
            'Feed 1 - Reporting dimension 6 value':'Animation_Style'}

df.rename(columns=new_names,inplace=True)
# Add conversion rate column
df['ConvRate']=df.Conversions / df.Impressions

In [77]:
#          Clean for CAROUSEL

# Concatenate/Aggregate carousel image styles from 3 columns: (rep dim values 4,5,6 in report)
df['Carousel_Concat'] = df['Carousel_Style']
df.loc[df['Strategy'] == 'One_Image','Carousel_Concat']='One_Image'
df.loc[df['Strategy'] == 'Two_Image','Carousel_Concat']='Two_Image'
df.loc[df['Animation_Style'] == 'One_Image','Carousel_Concat']='One_Image'
df.loc[df['Animation_Style'] == 'Two_Image','Carousel_Concat']='Two_Image'

df = df[df.Carousel_Concat != '---']
df = df[df.Carousel_Concat != '(not set)']
a = df['Carousel_Concat'].unique()
print(df)

     Advertiser              Dynamic_Profile  \
0       Caesars  CET_2018_Dynamic_V6_160x600   
1       Caesars  CET_2018_Dynamic_V6_160x600   
2       Caesars  CET_2018_Dynamic_V6_160x600   
3       Caesars  CET_2018_Dynamic_V6_160x600   
4       Caesars  CET_2018_Dynamic_V6_160x600   
5       Caesars  CET_2018_Dynamic_V6_160x600   
6       Caesars  CET_2018_Dynamic_V6_160x600   
7       Caesars  CET_2018_Dynamic_V6_160x600   
8       Caesars  CET_2018_Dynamic_V6_160x600   
9       Caesars  CET_2018_Dynamic_V6_160x600   
10      Caesars  CET_2018_Dynamic_V6_160x600   
11      Caesars  CET_2018_Dynamic_V6_160x600   
12      Caesars  CET_2018_Dynamic_V6_160x600   
13      Caesars  CET_2018_Dynamic_V6_160x600   
14      Caesars  CET_2018_Dynamic_V6_160x600   
15      Caesars  CET_2018_Dynamic_V6_160x600   
16      Caesars  CET_2018_Dynamic_V6_160x600   
17      Caesars  CET_2018_Dynamic_V6_160x600   
18      Caesars  CET_2018_Dynamic_V6_160x600   
19      Caesars  CET_2018_Dynamic_V6_160

In [79]:
#          Clean for VERSION and LAYOUT 

# There must be more conversions than impressions
df = df[df["Impressions"]>df["Conversions"]]

# All capitals for dynamic profile and deliminate
df['Dynamic_Profile'] = df['Dynamic_Profile'].str.upper()
df[['CET',
    'Year',
    'Dynamic',
    'Version',
    'Size']] = df['Dynamic_Profile'].str.split('_',
                                               5,
                                               expand=True)

# Rm unidentifiable values
df = df[df.Layout != '(not set)']
df = df[df.Layout != '---']
df = df.dropna(subset=["Layout", "Version"])

# Narrow to relevant columns
imp_columns = ["Layout","Version","Carousel_Concat","ConvRate","Impressions","Conversions"]
df = df[imp_columns]
print(df)

KeyError: 'Dynamic_Profile'

In [39]:
#          Pivot Tables
print('Layout & Version''\n\n',
      conv_rate_pivot_tbl_xy(df.Layout,df.Version),
     '\n')
print('Layout''\n\n',
      conv_rate_pivot_tbl_x(df.Layout),
     '\n')
print('Version''\n\n',
      conv_rate_pivot_tbl_x(df.Version))

Layout & Version

   Layout Version  Impressions  Conversions  ConvRate
0     OG      V6      7728265       3543.0  0.000458
1     OG      V7      6757058       1461.0  0.000216
2   Zoom      V6     15227485       8556.0  0.000562
3   Zoom      V7     13970990       2854.0  0.000204 

Layout

   Layout  Impressions  Conversions  ConvRate
0     OG     14485323       5004.0  0.000345
1   Zoom     29198475      11410.0  0.000391 

Version

   Version  Impressions  Conversions  ConvRate
0      V6     22955750      12099.0  0.000527
1      V7     20728048       4315.0  0.000208


In [44]:
#          Anova Tests
aov_table = two_way_anova_test(df.Layout,df.Version)

print('One Way''\n\n',
      one_way_anova_test(df.Layout),'\n')
print('Two Way''\n\n',
      two_way_anova_test(df.Layout,df.Version))

One Way

             sum_sq      df         F    PR(>F)
x         0.000076     1.0  8.952031  0.002788
Residual  0.033902  4019.0       NaN       NaN 

Two Way

               df    sum_sq   mean_sq          F    PR(>F)
x            1.0  0.000076  0.000076   8.985547  0.002738
y            1.0  0.000123  0.000123  14.639885  0.000132
x:y          1.0  0.000020  0.000020   2.407161  0.120860
Residual  4017.0  0.033759  0.000008        NaN       NaN


In [45]:
# Extraction from table and output
p_v0 = aov_table.iloc[0,4]
p_v1 = aov_table.iloc[1,4]

if p_v0 < .05:
    print(aov_table.index.values[0], 'Result IS significant')
else:
    print(aov_table.index.values[0], 'Result IS NOT significant')

if p_v1 < .05:
    print(aov_table.index.values[1], 'Result IS significant')
else:
    print(aov_table.index.values[1], 'Result IS NOT significant')

x Result IS significant
y Result IS significant
