## A/B Testing
# There are two different versions of an advertisement which is placed in emails, in banner ads on Facebook, Twitter, and Google. 
# Using aggregate measurements I analyzed how the two types of ads are performing on each of the different platforms on each day of the week. 

In [None]:

import pandas as pd
import numpy as np

ad_clicks = pd.read_csv('ad_clicks.csv')
ad_clicks.head()


In [22]:
# Ad views by source
ad_views = ad_clicks.groupby('utm_source').ad_click_timestamp.count().reset_index()
ad_views

Unnamed: 0,utm_source,ad_click_timestamp
0,email,80
1,facebook,180
2,google,239
3,twitter,66


In [9]:
# Check if ad was clicked
ad_clicks['is_click'] = ad_clicks['ad_click_timestamp'].apply(lambda x: 'True' if x is np.nan else 'False')
ad_clicks

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group,is_click
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A,False
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B,True
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A,True
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B,True
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B,True
...,...,...,...,...,...,...
1649,fe8b5236-78f6-4192-9da6-a76bba67cfe6,twitter,7 - Sunday,,A,True
1650,fed3db6d-8c92-40e3-a4fb-1fb9d7337eb1,facebook,5 - Friday,,B,True
1651,ff3a22ff-521c-478c-87ca-7dc7b8f34372,twitter,3 - Wednesday,,B,True
1652,ff3af0d6-b092-4c4d-9f2e-2bdd8f7c0732,google,1 - Monday,22:57,A,False


In [10]:
# Percent of people who clicked on ads from each utm_source
clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()
clicks_by_source

Unnamed: 0,utm_source,is_click,user_id
0,email,False,80
1,email,True,175
2,facebook,False,180
3,facebook,True,324
4,google,False,239
5,google,True,441
6,twitter,False,66
7,twitter,True,149


## Analyzing perfomance of ads from different sources

In [11]:
clicks_pivot = clicks_by_source.pivot(columns = 'is_click', index = 'utm_source', values = 'user_id')

# Calculated percent_clicked which is equal to the percent of users who clicked on the ad from each source.
# all sources of ads perform equal
clicks_pivot['percent_clicked'] = clicks_pivot['True'] / (clicks_pivot['True'] + clicks_pivot['False'])
clicks_pivot

is_click,False,True,percent_clicked
utm_source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
email,80,175,0.686275
facebook,180,324,0.642857
google,239,441,0.648529
twitter,66,149,0.693023


## Compare A and B type ads performance, check a hypothesis that the number of clicks change significantly by day of the week.

In [12]:
ad_clicks.groupby(['experimental_group']).count().user_id
ad_clicks.groupby(['experimental_group', 'is_click']).count().user_id

experimental_group  is_click
A                   False       310
                    True        517
B                   False       255
                    True        572
Name: user_id, dtype: int64

In [14]:
#Selected data related to A type ads
a_adclick = ad_clicks[ad_clicks['experimental_group'] == 'A']
a_adclick.groupby(['is_click', 'day']).user_id.count()

is_click  day          
False     1 - Monday       43
          2 - Tuesday      43
          3 - Wednesday    38
          4 - Thursday     47
          5 - Friday       51
          6 - Saturday     45
          7 - Sunday       43
True      1 - Monday       70
          2 - Tuesday      76
          3 - Wednesday    86
          4 - Thursday     69
          5 - Friday       77
          6 - Saturday     73
          7 - Sunday       66
Name: user_id, dtype: int64

In [15]:
#Selected data related to B type ads
b_adclick = ad_clicks[ad_clicks['experimental_group'] == 'B']
b_adclick.groupby(['is_click', 'day']).user_id.count()

is_click  day          
False     1 - Monday       32
          2 - Tuesday      45
          3 - Wednesday    35
          4 - Thursday     29
          5 - Friday       38
          6 - Saturday     42
          7 - Sunday       34
True      1 - Monday       81
          2 - Tuesday      74
          3 - Wednesday    89
          4 - Thursday     87
          5 - Friday       90
          6 - Saturday     76
          7 - Sunday       75
Name: user_id, dtype: int64

In [17]:
# Number of clicks by date for A type ads
a_adclick_pivot = a_adclick.groupby(['is_click', 'day']).user_id.count().reset_index().pivot(columns = 'is_click', index = 'day', values = 'user_id')
a_adclick_pivot

is_click,False,True
day,Unnamed: 1_level_1,Unnamed: 2_level_1
1 - Monday,43,70
2 - Tuesday,43,76
3 - Wednesday,38,86
4 - Thursday,47,69
5 - Friday,51,77
6 - Saturday,45,73
7 - Sunday,43,66


In [18]:
# Number of clicks by date for B type ads
b_adclick_pivot = b_adclick.groupby(['is_click', 'day']).user_id.count().reset_index().pivot(columns = 'is_click', index = 'day', values = 'user_id')
b_adclick_pivot

is_click,False,True
day,Unnamed: 1_level_1,Unnamed: 2_level_1
1 - Monday,32,81
2 - Tuesday,45,74
3 - Wednesday,35,89
4 - Thursday,29,87
5 - Friday,38,90
6 - Saturday,42,76
7 - Sunday,34,75


In [19]:
# Percent of users who clicked on the ad by date for A type ads
a_adclick_pivot['a_percent_day'] =  a_adclick_pivot['True']/(a_adclick_pivot['True'] + a_adclick_pivot['False'])
a_adclick_pivot

is_click,False,True,a_percent_day
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,43,70,0.619469
2 - Tuesday,43,76,0.638655
3 - Wednesday,38,86,0.693548
4 - Thursday,47,69,0.594828
5 - Friday,51,77,0.601562
6 - Saturday,45,73,0.618644
7 - Sunday,43,66,0.605505


In [21]:
# Percent of users who clicked on the ad by date for B type ads
b_adclick_pivot['b_percent_day'] =  b_adclick_pivot['True']/(b_adclick_pivot['True'] + b_adclick_pivot['False'])
b_adclick_pivot
#B type ads showed better results by 10%

is_click,False,True,b_percent_day
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,32,81,0.716814
2 - Tuesday,45,74,0.621849
3 - Wednesday,35,89,0.717742
4 - Thursday,29,87,0.75
5 - Friday,38,90,0.703125
6 - Saturday,42,76,0.644068
7 - Sunday,34,75,0.688073
