In [None]:
import numpy as np
import pandas as pd

In [None]:
# info i'm looking for is (ads for just 1 product):
# Media platform (YT, FB, Google, email)
# Spend/budget
# impressions
# clicks
# if product was sold
# revenue generated
# user info (age, location/region, gender)
# timestamps of ads served
# timestamp if sold 


# basic flow:
# if customer sees ad, probabilistically move to the next purchase phase based on ad suitability




class Consumer:
    channels = ['facebook', 'youtube', 'email', 'search']
    phases = ['problem recognition', 'information search', 'evaluation of alternatives', 
              'purchase decision', 'purchase', 'repurchase']

    base_effectiveness = np.array([
    [0.05, 0.1, 0.5, 0.0],  # problem recognition
    [0.05, 0.1, 0.5, 0.1],  # information search
    [0.07, 0.08, 0.6, 0.1],  # evaluation of alternatives
    [0.08, 0.05, 0.6, 0.05],  # purchase decision
    [0.09, 0.0, 0.6, 0.1],  # purchase
    [0.1, 0.0, 0.6, 0.1]]) # repurchase

    def __init__(self, uid):
        self.uid = uid
        self.phase = np.random.randint(0,6)
        self.transitions = np.random.uniform(0, 0.1, 6)
        self.channel_phase_suitability = np.random.uniform(0, 0.1, size = [6,4]) + self.base_effectiveness
        self.phase_cooldowns = np.random.randint(0, 8, 6)
        self.phase_cooldowns[5] = np.random.randint(8, 31)
        self.phase_cooldown = 0
        self.channel_preference = np.random.exponential(size=5) # last channel is "not online / no ads"
        self.channel_preference = self.channel_preference/self.channel_preference.sum()
        
    def see_ad(self, ad_channel):
        clickthrough = False
        purchase = False
        if self.phase_cooldown == 0:
            transition_probability = (self.channel_phase_suitability * self.transitions.reshape(6,1))[self.phase, ad_channel]
            if transition_probability > np.random.random():
                purchase = self.transition()
                if np.random.random() < 0.1:
                    clickthrough = True
            else:
                if np.random.random() < 0.01:
                    clickthrough = True
        return clickthrough, purchase
    
    def day(self, timestamp):
        self.phase_cooldown = np.max([0, self.phase_cooldown - 1])
        today_ads = np.max([0, int(np.random.normal(10, 10))])
        if today_ads == 0:
            return
        ad_channels = np.random.choice(np.arange(5), size=today_ads, p = self.channel_preference)
        ad_channels[np.where([ad_channels==2])[1][1:]] = 4
        timestamps = [timestamp + datetime.timedelta(hours = 16 * i/(today_ads)) for i in range(today_ads)]
        
        responses = []
        
        for idx, ad_channel in enumerate(ad_channels):
            if ad_channel != 4:
                row = [self.uid, timestamps[idx].strftime('%Y-%m-%d %H:%M:%S'), self.channels[ad_channel]] + list(self.see_ad(ad_channel))
                row = dict(zip(['uid', 'timestamp', 'channel', 'clickthrough', 'purchase'], row))
                responses.append(row)
        return responses
        
        
    def transition(self):
        purchase = False
        self.phase_cooldown = self.phase_cooldowns[self.phase]
        self.phase += 1
        if 0.9 > np.random.random():
            clickthrough = True
        if self.phase > 4:
            purchase = True
        if self.phase==6:
            self.phase=5
        return purchase

In [None]:
uids = np.random.choice(np.arange(10000000, 100000000), size=10000)
consumers = [Consumer(uid) for uid in uids]

In [None]:

responses = []
start = datetime.datetime(2021, 1, 1, 8, 0, 0)
for i in range(100):
    day = start + datetime.timedelta(days=i)
    print(day.strftime('%Y-%m-%d'))
    for c in consumers:
        response =  c.day(day)
        if response:
            responses.extend(response)
    


In [None]:
pd.DataFrame(responses).shape