## Installation

pip install pychattr

In [1]:

import pandas as pd
import numpy as np
from datetime import datetime
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

from pychattr.channel_attribution import MarkovModel

In [2]:
df = pd.read_csv('../attribution data.csv')
df.head()

Unnamed: 0,cookie,time,interaction,conversion,conversion_value,channel
0,00000FkCnDfDDf0iC97iC703B,2018-07-03T13:02:11Z,impression,0,0.0,Instagram
1,00000FkCnDfDDf0iC97iC703B,2018-07-17T19:15:07Z,impression,0,0.0,Online Display
2,00000FkCnDfDDf0iC97iC703B,2018-07-24T15:51:46Z,impression,0,0.0,Online Display
3,00000FkCnDfDDf0iC97iC703B,2018-07-29T07:44:51Z,impression,0,0.0,Online Display
4,0000nACkD9nFkBBDECD3ki00E,2018-07-03T09:44:57Z,impression,0,0.0,Paid Search


## Dataset details
* Cookie: Randomly generated customer id enabling us to tie subsequent visits back to the same customer
* Timestamp: Date and time when the visit took place
* Interaction: Categorical variable indicating the type of interaction that took place
* Conversion: Boolean variable indicating whether a conversion took place
* Conversion Value: Value of the potential conversion event
* Channel: The marketing channel that brought the customer to our site

In [3]:
print("Summary Statistics of the data")
print("------------------------------")
print("Dataset size:", df.shape[0])
print("Total number of channels/marketing touchpoints:", df['channel'].unique().shape[0], df['channel'].unique())
print("Total Unique customers:", df['cookie'].unique().shape[0])
print("Total number of conversions:", df[df['conversion']==1].shape[0])

Summary Statistics of the data
------------------------------
Dataset size: 586737
Total number of channels/marketing touchpoints: 5 ['Instagram' 'Online Display' 'Paid Search' 'Facebook' 'Online Video']
Total Unique customers: 240108
Total number of conversions: 17639


In [None]:
df = df.sort_values(['cookie', 'time'],ascending=[False, True])
df['visit_order'] = df.groupby('cookie').cumcount() + 1

## Generating user journey paths
User-journey in a list of touch-points.

1. For each cookie, group the channels into a list
2. Calculate the last touch channel resulted in conversion/non conversion for each cookie (visitor)
3. Merging above two will give you cookie, channel and conversion.
4. For each path, now we will append Start, Converted & Null based on onversion column in order to generate complete user journey paths for each cookie.

In [73]:
df_paths = df.groupby('cookie')['channel'].aggregate(lambda x: x.unique().tolist()).reset_index()
df_last_interaction = df.drop_duplicates('cookie',keep ='last')[['cookie', 'conversion']]
df_paths = pd.merge(df_paths, df_last_interaction, how='left', on = ['cookie'])

print(df_paths.head())
df_paths['path'] = df_paths['channel']
df_paths.drop(['channel'], axis=1, inplace=True)
df_paths.head()

                      cookie                      channel  conversion
0  00000FkCnDfDDf0iC97iC703B  [Instagram, Online Display]           0
1  0000nACkD9nFkBBDECD3ki00E                [Paid Search]           0
2  0003EfE37E93D0BC03iBhBBhF                [Paid Search]           0
3  00073CFE3FoFCn70fBhB3kfon                  [Instagram]           0
4  00079hhBkDF3k3kDkiFi9EFAD                [Paid Search]           0


Unnamed: 0,cookie,conversion,path
0,00000FkCnDfDDf0iC97iC703B,0,"[Instagram, Online Display]"
1,0000nACkD9nFkBBDECD3ki00E,0,[Paid Search]
2,0003EfE37E93D0BC03iBhBBhF,0,[Paid Search]
3,00073CFE3FoFCn70fBhB3kfon,0,[Instagram]
4,00079hhBkDF3k3kDkiFi9EFAD,0,[Paid Search]


In [74]:
p = ['>'.join(x) for x in list(df_paths['path'])]
df_paths["path_new"] = p
df_paths.head()

Unnamed: 0,cookie,conversion,path,path_new
0,00000FkCnDfDDf0iC97iC703B,0,"[Instagram, Online Display]",Instagram>Online Display
1,0000nACkD9nFkBBDECD3ki00E,0,[Paid Search],Paid Search
2,0003EfE37E93D0BC03iBhBBhF,0,[Paid Search],Paid Search
3,00073CFE3FoFCn70fBhB3kfon,0,[Instagram],Instagram
4,00079hhBkDF3k3kDkiFi9EFAD,0,[Paid Search],Paid Search


## Markov Model

In [84]:
#Setting parameters for model.
path_feature="path_new"
conversion_feature="conversion"
null_feature="conversion"
separator=","
k_order=1
n_simulations=10000
max_steps=None
return_transition_probs=True
random_state=26

In [85]:
# instantiate the model
mm = MarkovModel(path_feature=path_feature,
                 conversion_feature=conversion_feature,
                 null_feature=null_feature,
                 #revenue_feature=revenue_feature,
                 cost_feature=conversion_feature,
                 separator=separator,
                 k_order=k_order,
                 n_simulations=n_simulations,
                 max_steps=max_steps,
                 return_transition_probs=return_transition_probs,
                 random_state=random_state)

# fit the model
mm.fit(df_paths)

<pychattr.channel_attribution.markov.MarkovModel at 0x1ea168e7278>

## Markov Model

In [86]:
# view the simulation results
print(mm.attribution_model_)

                                          channel_name  total_conversions
0                             Instagram>Online Display          24.719319
1                                          Paid Search        3669.053253
2                                            Instagram        1027.617417
3                                   Instagram>Facebook         988.772773
4                                             Facebook        3029.882282
..                                                 ...                ...
252  Online Display>Instagram>Paid Search>Online Video           0.000000
253  Instagram>Paid Search>Online Video>Online Disp...           0.000000
254  Online Video>Facebook>Online Display>Paid Sear...           0.000000
255  Paid Search>Online Display>Instagram>Facebook>...           0.000000
256  Online Display>Online Video>Paid Search>Instagram           0.000000

[257 rows x 2 columns]


In [87]:
conversions = mm.attribution_model_
conversions[conversions['total_conversions']!=0].sort_values('total_conversions', ascending=False)

Unnamed: 0,channel_name,total_conversions
1,Paid Search,3669.053253
4,Facebook,3029.882282
5,Online Video,2616.716517
8,Online Display,1490.221822
13,Facebook>Instagram,1260.685285
...,...,...
46,Paid Search>Online Video>Facebook>Instagram,3.531331
41,Paid Search>Online Display>Facebook>Instagram,3.531331
40,Facebook>Paid Search>Online Display>Instagram,3.531331
116,Paid Search>Facebook>Instagram>Online Display,3.531331


In [88]:
mm.transition_matrix_

Unnamed: 0,channel_from,channel_to,transition_probability
0,(start),Paid Search,0.212994
1,(start),Facebook,0.170134
2,(start),Online Video>Instagram,0.003742
3,(start),Online Video,0.149328
4,(start),Instagram>Facebook,0.051817
...,...,...,...
490,Online Video>Paid Search>Online Display>Instag...,(null),inf
491,Facebook>Paid Search>Online Display>Online Video,(conversion),inf
492,Facebook>Paid Search>Online Display>Online Video,(null),inf
493,Paid Search>Online Display>Online Video>Facebo...,(conversion),inf


In [89]:
# view the transition matrix
trans_mtx = mm.transition_matrix_
trans_mtx[trans_mtx['transition_probability']!= np.inf].sort_values('transition_probability', ascending=False)

Unnamed: 0,channel_from,channel_to,transition_probability
198,Facebook>Paid Search,(null),0.500000
174,Facebook,(null),0.500000
180,Online Video>Facebook>Instagram,(null),0.500000
179,Online Video>Facebook>Instagram,(conversion),0.500000
178,Online Display>Paid Search,(null),0.500000
...,...,...,...
96,(start),Online Video>Facebook>Instagram>Online Display,0.000057
138,(start),Online Video>Paid Search>Instagram,0.000057
136,(start),Online Display>Paid Search>Online Video>Facebook,0.000057
147,(start),Online Video>Facebook>Online Display>Instagram,0.000057


In [90]:
# view the removal effects
removal_effect = mm.removal_effects_
removal_effect[removal_effect['removal_effect'] !=0]

Unnamed: 0,channel_name,removal_effect
0,Instagram>Online Display,0.001401
1,Paid Search,0.208008
2,Instagram,0.058258
3,Instagram>Facebook,0.056056
4,Facebook,0.171772
...,...,...
191,Online Display>Facebook>Paid Search>Online Video,0.000200
198,Instagram>Facebook>Paid Search>Online Video>On...,0.000200
221,Online Video>Instagram>Online Display,0.000200
229,Online Video>Instagram>Paid Search>Online Display,0.000200


### References

https://github.com/jmwoloso/pychattr