In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv('../attribution data.csv')
df.head()

Unnamed: 0,cookie,time,interaction,conversion,conversion_value,channel
0,00000FkCnDfDDf0iC97iC703B,2018-07-03T13:02:11Z,impression,0,0.0,Instagram
1,00000FkCnDfDDf0iC97iC703B,2018-07-17T19:15:07Z,impression,0,0.0,Online Display
2,00000FkCnDfDDf0iC97iC703B,2018-07-24T15:51:46Z,impression,0,0.0,Online Display
3,00000FkCnDfDDf0iC97iC703B,2018-07-29T07:44:51Z,impression,0,0.0,Online Display
4,0000nACkD9nFkBBDECD3ki00E,2018-07-03T09:44:57Z,impression,0,0.0,Paid Search


## Dataset details
* Cookie: Randomly generated customer id enabling us to tie subsequent visits back to the same customer
* Timestamp: Date and time when the visit took place
* Interaction: Categorical variable indicating the type of interaction that took place
* Conversion: Boolean variable indicating whether a conversion took place
* Conversion Value: Value of the potential conversion event
* Channel: The marketing channel that brought the customer to our site

In [5]:
print("Summary Statistics of the data")
print("------------------------------")
print("Dataset size:", df.shape[0])
print("Total number of channels/marketing touchpoints:", df['channel'].unique().shape[0], df['channel'].unique())
print("Total Unique customers:", df['cookie'].unique().shape[0])
print("Total number of conversions:", df[df['conversion']==1].shape[0])

Summary Statistics of the data
------------------------------
Dataset size: 586737
Total number of channels/marketing touchpoints: 5 ['Instagram' 'Online Display' 'Paid Search' 'Facebook' 'Online Video']
Total Unique customers: 240108
Total number of conversions: 17639


In [6]:
df = df.sort_values(['cookie', 'time'],ascending=[False, True])
df['visit_order'] = df.groupby('cookie').cumcount() + 1

In [7]:
df.head()

Unnamed: 0,cookie,time,interaction,conversion,conversion_value,channel,visit_order
586736,ooooohAFofEnonEikhAi3fF9o,2018-07-14T17:17:12Z,impression,0,0.0,Paid Search,1
586734,ooooiBh70D3k3BfAhDFfii9h7,2018-07-03T12:57:25Z,impression,0,0.0,Paid Search,1
586735,ooooiBh70D3k3BfAhDFfii9h7,2018-07-19T08:17:59Z,impression,0,0.0,Online Video,2
586731,ooooEiB0CCoEf9fiiC90Dfhfk,2018-07-06T23:30:38Z,impression,0,0.0,Online Display,1
586732,ooooEiB0CCoEf9fiiC90Dfhfk,2018-07-12T23:50:45Z,impression,0,0.0,Online Display,2


## Generating user journey paths
User-journey in a list of touch-points.

1. For each cookie, group the channels into a list
2. Calculate the last touch channel resulted in conversion/non conversion for each cookie (visitor)
3. Merging above two will give you cookie, channel and conversion.
4. For each path, now we will append Start, Converted & Null based on onversion column in order to generate complete user journey paths for each cookie.


In [8]:
df_paths = df.groupby('cookie')['channel'].aggregate(lambda x: x.unique().tolist()).reset_index()
df_last_interaction = df.drop_duplicates('cookie',keep ='last')[['cookie', 'conversion']]
df_paths = pd.merge(df_paths, df_last_interaction, how='left', on = ['cookie'])

print(df_paths.head())
df_paths['path'] = np.where(
    df_paths['conversion'] == 0,
    df_paths['channel'].apply(lambda x: ['Start'] + x + ['Null']),
    df_paths['channel'].apply(lambda x: ['Start'] + x + ['Converted']))

df_paths.drop(['channel', 'conversion'], axis=1, inplace=True)

                      cookie                      channel  conversion
0  00000FkCnDfDDf0iC97iC703B  [Instagram, Online Display]           0
1  0000nACkD9nFkBBDECD3ki00E                [Paid Search]           0
2  0003EfE37E93D0BC03iBhBBhF                [Paid Search]           0
3  00073CFE3FoFCn70fBhB3kfon                  [Instagram]           0
4  00079hhBkDF3k3kDkiFi9EFAD                [Paid Search]           0


In [9]:
df_paths.head()

Unnamed: 0,cookie,path
0,00000FkCnDfDDf0iC97iC703B,"[Start, Instagram, Online Display, Null]"
1,0000nACkD9nFkBBDECD3ki00E,"[Start, Paid Search, Null]"
2,0003EfE37E93D0BC03iBhBBhF,"[Start, Paid Search, Null]"
3,00073CFE3FoFCn70fBhB3kfon,"[Start, Instagram, Null]"
4,00079hhBkDF3k3kDkiFi9EFAD,"[Start, Paid Search, Null]"


## Markov Chains model
It will be implmented in 2 steps
1. Calculate transition probabilities between all states in state-space
2. Calculate removal effects


We’ll start by defining a list of all user journeys, the number of total conversion and the base level conversion rate.

In [10]:
list_of_paths = df_paths['path']
total_conversions = [sum(path.count('Converted') for path in list_of_paths)][0]
base_conversion_rate = total_conversions / len(list_of_paths)
list_of_unique_channels = list(df['channel'].unique()) + ['Start', 'Null', 'Converted']

len(list_of_paths), total_conversions, base_conversion_rate

(240108, 17639, 0.07346277508454528)

* We will calculate all potential state transitions and use this as an input when calculating transition probabilities.
* Converting transition probabilities to transition matrix.

In [11]:
def transition_states(list_of_paths):
    transition_states = {x + '>' + y: 0 for x in list_of_unique_channels for y in list_of_unique_channels}

    for possible_state in list_of_unique_channels:
        if possible_state not in ['Converted', 'Null']:
            for user_path in list_of_paths:
                if possible_state in user_path:
                    indices = [i for i, s in enumerate(user_path) if possible_state in s]
                    for col in indices:
                        transition_states[user_path[col] + '>' + user_path[col + 1]] += 1

    return transition_states

def transition_prob(trans_dict):
    trans_prob = defaultdict(dict)
    for state in list_of_unique_channels:
        if state not in ['Converted', 'Null']:
            counter = 0
            index = [i for i, s in enumerate(trans_dict) if state + '>' in s]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    counter += trans_dict[list(trans_dict)[col]]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter)
                    trans_prob[list(trans_dict)[col]] = np.round(state_prob,3)

    return trans_prob

def transition_matrix(list_of_paths, transition_probabilities):
    trans_matrix = pd.DataFrame()
    list_of_unique_channels = set(x for element in list_of_paths for x in element)

    for channel in list_of_unique_channels:
        trans_matrix[channel] = 0.00
        trans_matrix.loc[channel] = 0.00
        trans_matrix.loc[channel][channel] = 1.0 if channel in ['Converted', 'Null'] else 0.0

    for key, value in transition_probabilities.items():
        origin, destination = key.split('>')
        trans_matrix.at[origin, destination] = value

    return trans_matrix

## Transition probabilities

In [12]:
trans_states = transition_states(list_of_paths)
trans_prob = transition_prob(trans_states)
trans_matrix = transition_matrix(list_of_paths, trans_prob)

In [13]:
print(np.sum(list(trans_states.values())))
trans_states

550105


{'Paid Search>Paid Search': 0,
 'Paid Search>Online Video': 2596,
 'Paid Search>Online Display': 4269,
 'Paid Search>Instagram': 3038,
 'Paid Search>Facebook': 5725,
 'Paid Search>Start': 0,
 'Paid Search>Null': 68476,
 'Paid Search>Converted': 4736,
 'Online Video>Paid Search': 1964,
 'Online Video>Online Video': 0,
 'Online Video>Online Display': 775,
 'Online Video>Instagram': 1307,
 'Online Video>Facebook': 2430,
 'Online Video>Start': 0,
 'Online Video>Null': 31285,
 'Online Video>Converted': 3201,
 'Online Display>Paid Search': 3936,
 'Online Display>Online Video': 737,
 'Online Display>Online Display': 0,
 'Online Display>Instagram': 1251,
 'Online Display>Facebook': 2300,
 'Online Display>Start': 0,
 'Online Display>Null': 32236,
 'Online Display>Converted': 2144,
 'Instagram>Paid Search': 2266,
 'Instagram>Online Video': 1193,
 'Instagram>Online Display': 1164,
 'Instagram>Instagram': 0,
 'Instagram>Facebook': 10822,
 'Instagram>Start': 0,
 'Instagram>Null': 31153,
 'Instagram

In [14]:
#print(2596/(2596 + 4269 + 3038+5725 + 68476 + 4736), 4269/(2596 + 4269 + 3038+5725 + 68476 + 4736))
trans_prob

defaultdict(dict,
            {'Paid Search>Online Video': 0.029,
             'Paid Search>Online Display': 0.048,
             'Paid Search>Instagram': 0.034,
             'Paid Search>Facebook': 0.064,
             'Paid Search>Null': 0.771,
             'Paid Search>Converted': 0.053,
             'Online Video>Paid Search': 0.048,
             'Online Video>Online Display': 0.019,
             'Online Video>Instagram': 0.032,
             'Online Video>Facebook': 0.059,
             'Online Video>Null': 0.764,
             'Online Video>Converted': 0.078,
             'Online Display>Paid Search': 0.092,
             'Online Display>Online Video': 0.017,
             'Online Display>Instagram': 0.029,
             'Online Display>Facebook': 0.054,
             'Online Display>Null': 0.757,
             'Online Display>Converted': 0.05,
             'Instagram>Paid Search': 0.046,
             'Instagram>Online Video': 0.024,
             'Instagram>Online Display': 0.024,
        

In [15]:
trans_matrix

Unnamed: 0,Converted,Online Display,Null,Online Video,Start,Paid Search,Facebook,Instagram
Converted,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Online Display,0.05,0.0,0.757,0.017,0.0,0.092,0.054,0.029
Null,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
Online Video,0.078,0.019,0.764,0.0,0.0,0.048,0.059,0.032
Start,0.0,0.143,0.0,0.142,0.0,0.317,0.278,0.119
Paid Search,0.053,0.048,0.771,0.029,0.0,0.0,0.064,0.034
Facebook,0.053,0.024,0.673,0.026,0.0,0.051,0.0,0.173
Instagram,0.058,0.024,0.63,0.024,0.0,0.046,0.219,0.0


##  Calculate dentify removal effects for each of our marketing channels

We can now iteratively go through each of our channels and assess the impact it would have on overall conversion if we were to remove a channel from our state-space. 

In [16]:
def removal_effects(df, conversion_rate):
    removal_effects_dict = {}
    channels = [channel for channel in df.columns if channel not in ['Start',
                                                                     'Null',
                                                                     'Converted']]
    for channel in channels:
        removal_df = df.drop(channel, axis=1).drop(channel, axis=0)
        for column in removal_df.columns:
            row_sum = np.sum(list(removal_df.loc[column]))
            null_pct = float(1) - row_sum
            if null_pct != 0:
                removal_df.loc[column]['Null'] = null_pct
            removal_df.loc['Null']['Null'] = 1.0
        
        removal_to_conv = removal_df[
            ['Null', 'Converted']].drop(['Null', 'Converted'], axis=0)
        removal_to_non_conv = removal_df.drop(
            ['Null', 'Converted'], axis=1).drop(['Null', 'Converted'], axis=0)
        
        
        removal_inv_diff = np.linalg.inv(
            np.identity(
                len(removal_to_non_conv.columns)) - np.asarray(removal_to_non_conv))
        
        
        removal_dot_prod = np.dot(removal_inv_diff, np.asarray(removal_to_conv))
        
        removal_cvr = pd.DataFrame(removal_dot_prod,
                                   index=removal_to_conv.index)[[1]].loc['Start'].values[0]
       
        removal_effect = 1 - removal_cvr / conversion_rate
        removal_effects_dict[channel] = removal_effect

    return removal_effects_dict


removal_effects_dict = removal_effects(trans_matrix, base_conversion_rate)

In [17]:
removal_effects_dict

{'Online Display': 0.15859269716978952,
 'Online Video': 0.21107903201081113,
 'Paid Search': 0.33380466585734037,
 'Facebook': 0.35808310876301885,
 'Instagram': 0.22186848407313675}

The resulting removal effects dictionary can then be used to calculate the Markov Chain attributions for each of our marketing channels

In [248]:
def markov_chain_allocations(removal_effects, total_conversions):
    re_sum = np.sum(list(removal_effects.values()))

    return {k: (v / re_sum) * total_conversions for k, v in removal_effects.items()}


attributions = markov_chain_allocations(removal_effects_dict, total_conversions)
attributions

{'Facebook': 4921.373084541543,
 'Online Display': 2179.6443679022686,
 'Online Video': 2900.9987944909517,
 'Instagram': 3049.2853728774808,
 'Paid Search': 4587.698380187758}

## References

https://towardsdatascience.com/marketing-channel-attribution-with-markov-chains-in-python-part-2-the-complete-walkthrough-733c65b23323


https://serhiipuzyrov.com/2019/07/markov-chain-attribution-simple-explanation-of-removal-effect/


https://towardsdatascience.com/multi-channel-marketing-attribution-with-markov-6b744c0b119a


https://inside.getyourguide.com/blog/2020/10/8/selecting-and-designing-a-fractional-attribution-model