In [1]:
import pandas as pd

df = pd.read_csv('./../data/earth_day_missions.csv', index_col=0, low_memory=False)
df['outcome'] = df.apply(lambda x: 'SATISFIED' if x['satisfied'] else 'IGNORED' if x['ignored'] else 'FAILED', axis=1)
df = df[['start_date', 'user', 'missionId', 'missionType', 'missionSubType', 'period', 'target', 'title', 'outcome']]

df['start_date'] = pd.to_datetime(df['start_date'])
df['user'] = df['user'].astype('category').cat.codes
df['missionId'] = df['missionId'].astype('category').cat.codes
df['missionType'] = df['missionType'].astype('category')
df['missionSubType'] = df['missionSubType'].astype('category')
df['period'] = df['period'].astype('category')
df['outcome'] = pd.Categorical(df['outcome'], categories=['SATISFIED', 'FAILED', 'IGNORED'], ordered=True)

df = df.sort_values(by=['user', 'outcome', 'start_date'], ignore_index=True)

df

Unnamed: 0,start_date,user,missionId,missionType,missionSubType,period,target,title,outcome
0,2024-03-25,0,14,MEASURE,TYPEFORM_BASE,CUSTOM,1,calculate_carbon_footprint,SATISFIED
1,2024-03-25,0,30,TICKET,TICKET,CUSTOM,1,ticket,SATISFIED
2,2024-04-12,0,28,LEARN,DAILYEPISODE,DAILY,1,watch_the_daily_episode,SATISFIED
3,2024-04-12,0,18,ACT,ALL,DAILY,3,log_3_action,SATISFIED
4,2024-04-15,0,1,SURVEY,SURVEY,WEEKLY,1,fill_survey,SATISFIED
...,...,...,...,...,...,...,...,...,...
267891,2024-04-01,23232,29,ACT,FIXED,WEEKLY,2,log_2_action_vegetarian,IGNORED
267892,2024-04-08,23232,27,LEARN,DAILYEPISODE,WEEKLY,3,watch_the_daily_episode_3_consecutive,IGNORED
267893,2024-04-08,23232,33,ACT,ALL,WEEKLY,2,log_2_action2_consecutive,IGNORED
267894,2024-04-15,23232,1,SURVEY,SURVEY,WEEKLY,1,fill_survey,IGNORED


In [1]:
import numpy as np

n_arms = 5
true_probs = np.random.rand(n_arms)

n_rounds = 100

n_pulls = np.zeros(n_arms)
n_successes = np.zeros(n_arms)

for i in range(n_rounds):
    sample_probs = np.random.beta(n_successes + 1, n_pulls - n_successes + 1)
    arm = np.argmax(sample_probs)
    reward = np.random.binomial(1, true_probs[arm])

    n_pulls[arm] += 1
    n_successes[arm] += reward

    print(f"Round {i}: arm {arm} was pulled, reward {reward}")

estimated_probs = n_successes / n_pulls
print(f"Estimated probabilities: {estimated_probs}")
print(f"True probabilities: {true_probs}")

Round 0: arm 3 was pulled, reward 1
Round 1: arm 1 was pulled, reward 1
Round 2: arm 0 was pulled, reward 0
Round 3: arm 4 was pulled, reward 1
Round 4: arm 1 was pulled, reward 1
Round 5: arm 2 was pulled, reward 0
Round 6: arm 1 was pulled, reward 0
Round 7: arm 4 was pulled, reward 1
Round 8: arm 4 was pulled, reward 1
Round 9: arm 4 was pulled, reward 0
Round 10: arm 1 was pulled, reward 0
Round 11: arm 3 was pulled, reward 1
Round 12: arm 1 was pulled, reward 1
Round 13: arm 3 was pulled, reward 1
Round 14: arm 3 was pulled, reward 0
Round 15: arm 4 was pulled, reward 0
Round 16: arm 3 was pulled, reward 0
Round 17: arm 3 was pulled, reward 1
Round 18: arm 4 was pulled, reward 0
Round 19: arm 3 was pulled, reward 0
Round 20: arm 4 was pulled, reward 0
Round 21: arm 1 was pulled, reward 0
Round 22: arm 3 was pulled, reward 0
Round 23: arm 4 was pulled, reward 0
Round 24: arm 1 was pulled, reward 0
Round 25: arm 4 was pulled, reward 1
Round 26: arm 1 was pulled, reward 0
Round 27: a