In [1]:
import pandas as pd
import numpy as np
import datetime
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

control_data = pd.read_csv('archive/control_group.csv', sep=';')
test_data = pd.read_csv('archive/test_group.csv', sep=';')

# Prepare the dataset (Garbage in, garbage out & quality data beats fancy algorithms)

## Have a look at both datasets

In [6]:
control_data.head()

Unnamed: 0,Campaign Name,Date,Spend (USD),Impressions,Reach,Website Clicks,Searches,Content Viewed,Added to cart,Purchase
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Control Campaign,2.08.2019,1757,121040.0,102513.0,8110.0,2033.0,1841.0,1219.0,511.0
2,Control Campaign,3.08.2019,2343,131711.0,110862.0,6508.0,1737.0,1549.0,1134.0,372.0
3,Control Campaign,4.08.2019,1940,72878.0,61235.0,3065.0,1042.0,982.0,1183.0,340.0
4,Control Campaign,5.08.2019,1835,109559.758621,88844.931034,5320.793103,2221.310345,1943.793103,1300.0,522.793103


In [3]:
test_data.head()

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
0,Test Campaign,1.08.2019,3008,39550,35820,3038,1946,1069,894,255
1,Test Campaign,2.08.2019,2542,100719,91236,4657,2359,1548,879,677
2,Test Campaign,3.08.2019,2365,70263,45198,7885,2572,2367,1268,578
3,Test Campaign,4.08.2019,2710,78451,25937,4216,2216,1437,566,340
4,Test Campaign,5.08.2019,2297,114295,95138,5863,2106,858,956,768


## Changing the column names

In [3]:
control_data.columns = ['Campaign Name', 'Date', 'Spend (USD)', 'Impressions', 'Reach',
                         'Website Clicks', 'Searches', 'Content Viewed',
                         'Added to cart', 'Purchase']
test_data.columns = ['Campaign Name', 'Date', 'Spend (USD)', 'Impressions', 'Reach',
                         'Website Clicks', 'Searches', 'Content Viewed',
                         'Added to cart', 'Purchase']
test_data.columns

Index(['Campaign Name', 'Date', 'Spend (USD)', 'Impressions', 'Reach',
       'Website Clicks', 'Searches', 'Content Viewed', 'Added to cart',
       'Purchase'],
      dtype='object')

## Looking for null values

In [5]:
control_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Campaign Name   30 non-null     object 
 1   Date            30 non-null     object 
 2   Spend (USD)     30 non-null     int64  
 3   Impressions     30 non-null     float64
 4   Reach           30 non-null     float64
 5   Website Clicks  30 non-null     float64
 6   Searches        30 non-null     float64
 7   Content Viewed  30 non-null     float64
 8   Added to cart   30 non-null     float64
 9   Purchase        30 non-null     float64
dtypes: float64(7), int64(1), object(2)
memory usage: 2.5+ KB


## Another way

In [4]:
control_data.isnull().sum()

Campaign Name     0
Date              0
Spend (USD)       0
Impressions       0
Reach             0
Website Clicks    0
Searches          0
Content Viewed    0
Added to cart     0
Purchase          0
dtype: int64

In [4]:
test_data.isnull().sum()

Campaign Name          0
Date                   0
Spend [USD]            0
# of Impressions       0
Reach                  0
# of Website Clicks    0
# of Searches          0
# of View Content      0
# of Add to Cart       0
# of Purchase          0
dtype: int64

## There was null values in Impressions, Reach, Website Clicks, Searches, Content Viewed, Added to cart & Purchase columns and filled with the mean but the inplace paramater assigned to 'True' and this mean the changes implenemted in the original dataset.

In [2]:
control_data["# of Impressions"].fillna(control_data["# of Impressions"].mean(), inplace=True)
control_data["Reach"].fillna(control_data["Reach"].mean(), inplace=True)
control_data["# of Website Clicks"].fillna(control_data["# of Website Clicks"].mean(), inplace=True)
control_data["# of Searches"].fillna(control_data["# of Searches"].mean(), inplace=True)
control_data["# of View Content"].fillna(control_data["# of View Content"].mean(), inplace=True)
control_data["# of Add to Cart"].fillna(control_data["# of Add to Cart"].mean(), inplace=True)
control_data["# of Purchase"].fillna(control_data["# of Purchase"].mean(), inplace=True)

## Creating the ab_data(control_data + test_data)

In [7]:
ab_data = control_data.merge(test_data, how='outer').sort_values('Date')
ab_data.reset_index(drop=True)
ab_data



Unnamed: 0,Campaign Name,Date,Spend (USD),Impressions,Reach,Website Clicks,Searches,Content Viewed,Added to cart,Purchase
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
30,Test Campaign,1.08.2019,3008,39550.0,35820.0,3038.0,1946.0,1069.0,894.0,255.0
39,Test Campaign,10.08.2019,2790,95054.0,79632.0,8125.0,2312.0,1804.0,424.0,275.0
9,Control Campaign,10.08.2019,2149,117624.0,91257.0,2277.0,2475.0,1984.0,1629.0,734.0
40,Test Campaign,11.08.2019,2420,83633.0,71286.0,3750.0,2893.0,2617.0,1075.0,668.0
10,Control Campaign,11.08.2019,2490,115247.0,95843.0,8137.0,2941.0,2486.0,1887.0,475.0
41,Test Campaign,12.08.2019,2831,124591.0,10598.0,8264.0,2081.0,1992.0,1382.0,709.0
11,Control Campaign,12.08.2019,2319,116639.0,100189.0,2993.0,1397.0,1147.0,1439.0,794.0
42,Test Campaign,13.08.2019,1972,65827.0,49531.0,7568.0,2213.0,2058.0,1391.0,812.0
12,Control Campaign,13.08.2019,2697,82847.0,68214.0,6554.0,2390.0,1975.0,1794.0,766.0


## Merging successfully completed?

In [8]:
ab_data['Campaign Name'].value_counts()

Control Campaign    30
Test Campaign       30
Name: Campaign Name, dtype: int64

## The analysis part to find the best campaign

## The relationship between the number of impressions and the amount spent on both campaigns

In [15]:
figure = px.scatter(data_frame = ab_data, 
                    x="Impressions",
                    y="Spend (USD)", 
                    size="Spend (USD)", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

## The control campaign resulted in more impressions

## The number of searches performed on the website from both campaigns

In [11]:
label = ["Total Searches from Control Campaign", 
         "Total Searches from Test Campaign"]
counts = [sum(control_data["Searches"]), 
          sum(test_data["Searches"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Searches')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## The test campaign resulted in more searches on the website

##  The number of website clicks from both campaigns

In [12]:
label = ["Website Clicks from Control Campaign", 
         "Website Clicks from Test Campaign"]
counts = [sum(control_data["Website Clicks"]), 
          sum(test_data["Website Clicks"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Website Clicks')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## The test campaign wins in the number of website clicks

## the amount of content viewed after reaching the website from both campaigns

In [13]:
label = ["Content Viewed from Control Campaign", 
         "Content Viewed from Test Campaign"]
counts = [sum(control_data["Content Viewed"]), 
          sum(test_data["Content Viewed"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Content Viewed')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## The audience of the control campaign viewed more content than the test campaign. Although there is not much difference, as the website clicks of the control campaign were low, its engagement on the website is higher than the test campaign.

## The number of products added to the cart from both campaigns

In [17]:
label = ["Products Added to Cart from Control Campaign", 
         "Products Added to Cart from Test Campaign"]
counts = [sum(control_data["Added to cart"]), 
          sum(test_data["Added to cart"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Added to Cart')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## Despite low website clicks more products were added to the cart from the control campaign.

## The amount spent on both campaigns

In [19]:
label = ["Amount Spent in Control Campaign", 
         "Amount Spent in Test Campaign"]
counts = [sum(control_data["Spend (USD)"]), 
          sum(test_data["Spend (USD)"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Amount Spent')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## The amount spent on the test campaign is higher than the control campaign. But as we can see that the control campaign resulted in more content views and more products in the cart, the control campaign is more efficient than the test campaign.

## The purchases made by both campaigns

In [20]:
label = ["Purchases Made by Control Campaign", 
         "Purchases Made by Test Campaign"]
counts = [sum(control_data["Purchase"]), 
          sum(test_data["Purchase"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Purchases')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
# fig.show()

## There’s only a difference of around 1% in the purchases made from both ad campaigns. As the Control campaign resulted in more sales in less amount spent on marketing, the control campaign wins here

# Which ad campaign converts more?

## The relationship between the number of website clicks and content viewed from both campaigns

In [22]:
figure = px.scatter(data_frame = ab_data, 
                    x="Content Viewed",
                    y="Website Clicks", 
                    size="Website Clicks", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

## The website clicks are higher in the test campaign, but the engagement from website clicks is higher in the control campaign. So the control campaign wins

## The relationship between the amount of content viewed and the number of products added to the cart from both campaigns

In [25]:
figure = px.scatter(data_frame = ab_data, 
                    x="Added to cart",
                    y="Content Viewed", 
                    size="Added to cart", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

## Again, the control campaign wins

## The number of products added to the cart and the number of sales from both campaigns

In [27]:
figure = px.scatter(data_frame = ab_data, 
                    x="Purchase",
                    y="Added to cart", 
                    size="Purchase", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

## Although the control campaign resulted in more sales and more products in the cart, the conversation rate of the test campaign is higher.

## From the above A/B tests, we found that the control campaign resulted in more sales and engagement from the visitors. More products were viewed from the control campaign, resulting in more products in the cart and more sales. But the conversation rate of products in the cart is higher in the test campaign. The test campaign resulted in more sales according to the products viewed and added to the cart. And the control campaign results in more sales overall. So, the Test campaign can be used to market a specific product to a specific audience, and the Control campaign can be used to market multiple products to a wider audience.