<h1>Attribution Models</h1>

<br>

<h3><font color='grey'>Inspecting the data</font></h3>

<ul>
    <li><b><code>Libraries</code></b> - </li>
    <br>
    <li><b><code>pd.read_csv()</code></b> - </li>
</ul>

In [320]:
# libarires
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# style
from ipynb.fs.full.Style_sheet import *
plt.style.use(style_use)
plt.rcParams.update(style_params)
color = palette_paired

# ignore slice warnings
import warnings
warnings.filterwarnings('ignore')

# read_csv
def read_csv(f):
    df = pd.read_csv(f, parse_dates=True)
    return df

# attribution data
f = r'Data\data_task.csv'
att = read_csv(f)

# show
print(att.dtypes)
att.head()

cookie                     object
date_time                  object
event                      object
conversion_value_pound    float64
channel                    object
dtype: object


Unnamed: 0,cookie,date_time,event,conversion_value_pound,channel
0,00000FkCnDfDDf0iC97iC703B,03/07/2018 13:02:11,impression,0.0,Instagram
1,00000FkCnDfDDf0iC97iC703B,17/07/2018 19:15:07,impression,0.0,Online Display
2,00000FkCnDfDDf0iC97iC703B,24/07/2018 15:51:46,impression,0.0,Online Display
3,00000FkCnDfDDf0iC97iC703B,29/07/2018 07:44:51,impression,0.0,Online Display
4,0000nACkD9nFkBBDECD3ki00E,03/07/2018 09:44:57,impression,0.0,Paid Search


<hr>

<br>

<h3><font color='grey'>Cleaning Data</font></h3>

<ul>
    <li><b><code>date_time</code></b> - </li>
    <br>
    <li><b><code>fix_timestamp</code></b> - </li>
</ul>

In [321]:
# fix_timestamp
def fix_timestamp(df, col):
    #dt = df[col].str[6:10] + '-' + df[col].str[3:5] + '-' + df[col].str[:2]
    ts = df[col].str[6:10] + '-' + df[col].str[3:5] + '-' + df[col].str[:2] + ' ' + df[col].str[11:19]
    #df['date'] = dt.astype('datetime64[ns]')
    df['date_time'] = ts.astype('datetime64[ns]')
    return df

fix_timestamp(att, 'date_time')

print(att.dtypes)
att.head()

cookie                            object
date_time                 datetime64[ns]
event                             object
conversion_value_pound           float64
channel                           object
dtype: object


Unnamed: 0,cookie,date_time,event,conversion_value_pound,channel
0,00000FkCnDfDDf0iC97iC703B,2018-07-03 13:02:11,impression,0.0,Instagram
1,00000FkCnDfDDf0iC97iC703B,2018-07-17 19:15:07,impression,0.0,Online Display
2,00000FkCnDfDDf0iC97iC703B,2018-07-24 15:51:46,impression,0.0,Online Display
3,00000FkCnDfDDf0iC97iC703B,2018-07-29 07:44:51,impression,0.0,Online Display
4,0000nACkD9nFkBBDECD3ki00E,2018-07-03 09:44:57,impression,0.0,Paid Search


<hr>

<br>

<h3><font color='grey'>Data Enrichment</font></h3>

<ul>
    <li><b><code>events_total</code></b> - </li>
    <br>
    <li><b><code>conv_user_ind & _value</code></b> - </li>
    <br>
    <li><b><code>rank_event</code></b> - </li>
    <br>
    <li><b><code>decay_time</code></b> - </li>
    <br>
    <li><b><code>30_day_ind</code></b> - </li>
</ul>

In [322]:
# total events
clk = pd.DataFrame(att.groupby('cookie')['cookie'].count())
clk.columns = ['event_total']
clk = clk.reset_index()
att = pd.merge(att, clk, on='cookie', how='left')


# flag converting users
col = ['cookie', 'conversion_value_pound','conv_user_ind']
cvr = att.query('event == "conversion"')
cvr[col[2]] = 1
cvr = cvr[col]
cvr.rename(columns={col[1]: 'conv_user_value'}, inplace=True)
att = pd.merge(att, cvr, on='cookie', how='left').fillna(0)


# rank clicks in journey
def rank_event(df, col, grp, asc):
    r = att.sort_values(col, ascending=asc)\
        .groupby(grp)\
        .cumcount() + 1
    return r

col = ['cookie', 'date_time']
grp = col[0]
att['seq_first_event'] = rank_event(att, col, grp, True)
att['seq_last_event'] = rank_event(att, col, grp, False)


# decay (days) from first/last click
def decay_time(df, c, p):
    
    if p == 1:
        m = att.groupby(c[0])[c[1]].min()
    else:
        m = att.groupby(c[0])[c[1]].max()  
        
    dcy = (df[c[1]] - att[c[0]].map(m)) * p
    dcy = dcy / np.timedelta64(1, 'D')
    return dcy

att['decay_first_event'] = decay_time(att, col, 1)
att['decay_last_event'] = decay_time(att, col, -1)


# flag events within 30 days of final event
att['day_30_ind'] = np.where(att['decay_last_event'] <= 30, 1, 0)
att = att.query('day_30_ind == 1')


# show
att.query('cookie == "00EEFkC3oF309k7DFBkAkkEkC"')

Unnamed: 0,cookie,date_time,event,conversion_value_pound,channel,event_total,conv_user_value,conv_user_ind,seq_first_event,seq_last_event,decay_first_event,decay_last_event,day_30_ind
1234,00EEFkC3oF309k7DFBkAkkEkC,2018-07-08 16:17:05,impression,0.0,Facebook,6,5.5,1.0,1,6,0.0,4.944525,1
1235,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 13:35:48,impression,0.0,Facebook,6,5.5,1.0,2,5,4.887998,0.056528,1
1236,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 13:36:20,impression,0.0,Instagram,6,5.5,1.0,3,4,4.888368,0.056157,1
1237,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:09:59,impression,0.0,Instagram,6,5.5,1.0,4,3,4.911736,0.032789,1
1238,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:10:13,impression,0.0,Facebook,6,5.5,1.0,5,2,4.911898,0.032627,1
1239,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:57:12,conversion,5.5,Facebook,6,5.5,1.0,6,1,4.944525,0.0,1


<hr>

<br>

<h3><font color='grey'>Attribution Logic</font></h3>

<ul>
    <li><b><code>first_touch</code></b> - </li>
    <br>
    <li><b><code>last_touch</code></b> - </li>
    <br>
    <li><b><code>last_touch_nd</code></b> - </li>
    <br>
    <li><b><code>linear</code></b> - </li>
    <br>
    <li><b><code>decay</code></b> - </li>
</ul>

In [323]:
df = att.query('cookie == "00EEFkC3oF309k7DFBkAkkEkC"')
v = 'conv_user_value'

# first touch
ft = (df['conv_user_ind']==1) & (df['seq_first_event']==1)

# last touch
lt = (df['conv_user_ind']==1) & (df['seq_last_event']==1)

# non-direct last touch
nd1 = (df['conv_user_ind']==1) & (df['event_total']==1) & (df['seq_last_event']==1)
nd2 = (df['conv_user_ind']==1) & (df['event_total']>1) & (df['seq_last_event']==2)

# application
df['att_first_touch'] = np.where(ft, df[v], 0)
df['att_last_touch'] = np.where(lt, df[v], 0)
df['att_last_touch_nd'] = np.where(nd1, df[v], np.where(nd2, df[v], 0))


df

Unnamed: 0,cookie,date_time,event,conversion_value_pound,channel,event_total,conv_user_value,conv_user_ind,seq_first_event,seq_last_event,decay_first_event,decay_last_event,day_30_ind,att_first_touch,att_last_touch,att_last_touch_nd
1234,00EEFkC3oF309k7DFBkAkkEkC,2018-07-08 16:17:05,impression,0.0,Facebook,6,5.5,1.0,1,6,0.0,4.944525,1,5.5,0.0,0.0
1235,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 13:35:48,impression,0.0,Facebook,6,5.5,1.0,2,5,4.887998,0.056528,1,0.0,0.0,0.0
1236,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 13:36:20,impression,0.0,Instagram,6,5.5,1.0,3,4,4.888368,0.056157,1,0.0,0.0,0.0
1237,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:09:59,impression,0.0,Instagram,6,5.5,1.0,4,3,4.911736,0.032789,1,0.0,0.0,0.0
1238,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:10:13,impression,0.0,Facebook,6,5.5,1.0,5,2,4.911898,0.032627,1,0.0,0.0,5.5
1239,00EEFkC3oF309k7DFBkAkkEkC,2018-07-13 14:57:12,conversion,5.5,Facebook,6,5.5,1.0,6,1,4.944525,0.0,1,0.0,5.5,0.0
