# GDPR Homework

## Using the data in the table, build a model to understand the “last click attribution” of the unconsented clicks

In [1]:
import pandas as pd
import numpy as np

In [15]:
df = pd.read_csv('gdpr_data.csv')
df

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions,conversion\nrate
0,consented,google,10000,200,2.00%
1,consented,bing,2500,30,1.20%
2,consented,facebook,5000,250,5.00%
3,consented,instagram,4000,40,1.00%
4,consented,tiktok,3000,18,0.60%
5,consented,(direct),2000,200,10.00%
6,unconsented,(direct),11130,310,2.78%


In [16]:
# convert datatypes
df['clicks'] = df['clicks'].apply(lambda x: x.replace(',',''))
df = df.astype({'clicks':'int'})
df.dtypes

user\ngroup                object
last click\nattribution    object
clicks                      int64
conversions                 int64
conversion\nrate           object
dtype: object

In [18]:
# add total clicks for consented
sum_consent = np.sum(df['clicks'])
print(f'sum of total clicks for consented: {sum_consent}')

sum of total clicks for consented: 37630


In [19]:
# add total conversions for consented
sum_conv = np.sum(df['conversions'])
print(f'sum of total conversions for consented: {sum_conv}')

sum of total conversions for consented: 1048


In [30]:
# calculate percentage of clicks out of total for consented for each attribution
df['percent_of_clicks'] = df['clicks'].apply(lambda x: x/sum_consent)
df

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions,conversion\nrate,percent_of_clicks,percent_of_conv
0,consented,google,10000,200,2.00%,0.265745,0.19084
1,consented,bing,2500,30,1.20%,0.066436,0.028626
2,consented,facebook,5000,250,5.00%,0.132873,0.23855
3,consented,instagram,4000,40,1.00%,0.106298,0.038168
4,consented,tiktok,3000,18,0.60%,0.079724,0.017176
5,consented,(direct),2000,200,10.00%,0.053149,0.19084
6,unconsented,(direct),11130,310,2.78%,0.295775,0.295802


In [31]:
# calculate percentage of conversions out of total for consented for each attribution
df['percent_of_conv'] = df['conversions'].apply(lambda x: x/sum_conv)
df

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions,conversion\nrate,percent_of_clicks,percent_of_conv
0,consented,google,10000,200,2.00%,0.265745,0.19084
1,consented,bing,2500,30,1.20%,0.066436,0.028626
2,consented,facebook,5000,250,5.00%,0.132873,0.23855
3,consented,instagram,4000,40,1.00%,0.106298,0.038168
4,consented,tiktok,3000,18,0.60%,0.079724,0.017176
5,consented,(direct),2000,200,10.00%,0.053149,0.19084
6,unconsented,(direct),11130,310,2.78%,0.295775,0.295802


In [34]:
# estimate number of clicks for each attribution for unconsented based on previously calculated percentages
sum_clicks_unconsent = df.iloc[6,2]
series_clicks = df['percent_of_clicks'].apply(lambda x: x*sum_clicks_unconsent)
series_clicks

0    2957.746479
1     739.436620
2    1478.873239
3    1183.098592
4     887.323944
5     591.549296
6    3291.971831
Name: percent_of_clicks, dtype: float64

In [35]:
# estimate number of clicks for each attribution for unconsented based on previously calculated percentages
sum_conv_unconsent = df.iloc[6,3]
series_conv = df['percent_of_conv'].apply(lambda x: x*sum_conv_unconsent)
series_conv

0    59.160305
1     8.874046
2    73.950382
3    11.832061
4     5.324427
5    59.160305
6    91.698473
Name: percent_of_conv, dtype: float64

In [41]:
df2 = pd.DataFrame({'user\ngroup': ['unconsented']*6,
                  'last click\nattribution': df.iloc[:6, 1],
                  'clicks': series_clicks[:-1],
                  'conversions': series_conv[:-1]})
df2

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions
0,unconsented,google,2957.746479,59.160305
1,unconsented,bing,739.43662,8.874046
2,unconsented,facebook,1478.873239,73.950382
3,unconsented,instagram,1183.098592,11.832061
4,unconsented,tiktok,887.323944,5.324427
5,unconsented,(direct),591.549296,59.160305


In [48]:
final_df = pd.concat([df,df2])
final_df.reset_index(inplace=True)
final_df.drop(columns=['index'], inplace=True)
final_df

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions,conversion\nrate,percent_of_clicks,percent_of_conv
0,consented,google,10000.0,200.0,2.00%,0.265745,0.19084
1,consented,bing,2500.0,30.0,1.20%,0.066436,0.028626
2,consented,facebook,5000.0,250.0,5.00%,0.132873,0.23855
3,consented,instagram,4000.0,40.0,1.00%,0.106298,0.038168
4,consented,tiktok,3000.0,18.0,0.60%,0.079724,0.017176
5,consented,(direct),2000.0,200.0,10.00%,0.053149,0.19084
6,unconsented,(direct),11130.0,310.0,2.78%,0.295775,0.295802
7,unconsented,google,2957.746479,59.160305,,,
8,unconsented,bing,739.43662,8.874046,,,
9,unconsented,facebook,1478.873239,73.950382,,,


#### Final numbers for clicks, conversions and conversion rate for unconsented:

In [52]:
for i in range(7, 13):
    final_df.iloc[i,4] = str(round((final_df.iloc[i,3]/final_df.iloc[i,2]) * 100, 2)) + '%'

final_df

Unnamed: 0,user\ngroup,last click\nattribution,clicks,conversions,conversion\nrate,percent_of_clicks,percent_of_conv
0,consented,google,10000.0,200.0,2.00%,0.265745,0.19084
1,consented,bing,2500.0,30.0,1.20%,0.066436,0.028626
2,consented,facebook,5000.0,250.0,5.00%,0.132873,0.23855
3,consented,instagram,4000.0,40.0,1.00%,0.106298,0.038168
4,consented,tiktok,3000.0,18.0,0.60%,0.079724,0.017176
5,consented,(direct),2000.0,200.0,10.00%,0.053149,0.19084
6,unconsented,(direct),11130.0,310.0,2.78%,0.295775,0.295802
7,unconsented,google,2957.746479,59.160305,2.0%,,
8,unconsented,bing,739.43662,8.874046,1.2%,,
9,unconsented,facebook,1478.873239,73.950382,5.0%,,


## How would you estimate consent rate (i.e. percentage of users who consented to cookie tracking) using this table?

In [54]:
# take the percentage of total consented of total users

(df.iloc[0:6,2].sum() / df.iloc[0:13,2].sum()) * 100

70.4225352112676

#### Answer: 70.4% of users consent to cookie tracking.

### What assumption(s) is/are your model making about user behavior? Do you believe the assumption(s)?

#### Answer: The model makes the assumption that percentage of users for each website (last click attribution) is similar across consented and unconsented, hence it uses that percentage to calculate number of clicks and conversions from the total unconsented.