In [1]:
'''
   Trying MAB
'''

'\n   Trying MAB\n'

In [5]:
# lets get the data first
import numpy as np
import pandas as pd
import sqlalchemy

def get_connection():
    engine = sqlalchemy.create_engine('sqlite:////Users/nikhillondhe/projects/didactic-happiness/instance/tasks.db')
    return engine
    
def load_data():
    ''' Load data from sqlite db '''
    query = '''
         SELECT T.*, CAST(strftime('%H', R.rec_ts) as Integer) as rec_ts,
         CASE
             WHEN 
                 W.id IS NOT NULL 
                 AND cast((julianday(end_ts) - julianday(start_ts)) * 24 * 60 * 60 as Integer) > 60
             THEN 1
             ELSE 0
         END AS was_picked
         FROM task T
         INNER JOIN recommendation R
         ON T.id = R.task_id
         LEFT JOIN
         work_log W
         ON R.id = W.rec_id
    '''
    engine = get_connection()
    df = pd.read_sql(query, engine)
    engine.dispose()
    return df

In [6]:
col = 'priority'
rewards = {'low': 1, 'medium': 3, 'high': 7}
reward_col = 'was_picked'

df = load_data()

In [7]:
def compute_qvalues(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    values = df[col].unique()
    k = len(values)
    arm_to_idx = {v: k for k,v in enumerate(values)}
    Qvalues = np.zeros(k)
    counts = np.zeros(k)

    for idx, row in df.iterrows():
        arm_name = row[col]
        reward = rewards[arm_name] * row[reward_col]
        arm_idx = arm_to_idx[arm_name]
        counts[arm_idx] += 1
        Qvalues[arm_idx] += (reward - Qvalues[arm_idx]) / counts[arm_idx]

    
    return {k : Qvalues[v] for k, v in arm_to_idx.items()}

q_values = compute_qvalues(df, col, rewards, reward_col)
q_values

{'medium': np.float64(0.09221311475409828),
 'low': np.float64(0.08333333333333334),
 'high': np.float64(0.5752688172043007)}

In [8]:
# trying to do thomson sampling for alpha, beta comps

def compute_alpha_beta(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    #alpha += reward, beta += (exp reward - reward) but min(beta) = 1
    values = df[col].unique()
    distributions = {val : [1.0, 1.0] for val in values}
    exp_reward = sum(rewards.values()) * 1.0 / len(rewards)
    for _, row in df.iterrows():
        arm_name = row[col]
        distribution = distributions[arm_name]
        rewarded = row[reward_col]
        reward_value = rewards[arm_name]
        
        if rewarded:
            distribution[0] += reward_value
        else:
            distribution[1] += (exp_reward - reward_value)
            if distribution[1] < 1:
                distribution[1] = 1

    return distributions

alpha_beta = compute_alpha_beta(df, col, rewards, reward_col)
alpha_beta

{'medium': [91.0, 631.6666666666657],
 'low': [79.0, 2289.0000000000095],
 'high': [750.0, 1]}

In [24]:
def compute_ucb_values(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    '''Compute ucb values from historical data'''
    # avg reward + c * sqrt(ln(t) / num choiced)
    lnt = np.log(len(df))
    data = df.groupby([col, reward_col]).size().unstack()
    ucb = {}
    
    for k, v in rewards.items():
        n = data.loc[k].sum()
        reward = data.loc[k, 1] * v * 1.0
        avg_reward = reward / n
        ucb[k] = avg_reward + np.sqrt(lnt / n)
    return ucb

ucb = compute_ucb_values(df, col, rewards, reward_col)
ucb

{'low': np.float64(0.17621727632512502),
 'medium': np.float64(0.18317378646135832),
 'high': np.float64(0.6540228995664729)}

In [15]:
def compute_metrics(q_values):
    mean = np.mean(q_values)
    std_dev = np.std(q_values)
    cv = std_dev / mean if mean > 0 else 0
    entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))
    gini = np.sum(np.abs(np.subtract.outer(q_values, q_values))) / (2 * len(q_values) * np.sum(q_values))
    top_k_concentration = np.sum(sorted(q_values, reverse=True)[:2]) / np.sum(q_values)  # Top-2 concentration

    return {
        "mean": mean,
        "std_dev": std_dev,
        "cv": cv,
        "entropy": entropy,
        "gini": gini,
        "top_2_concentration": top_k_concentration,
    }

arm_types = {
    'complexity': [0.12806324, 0.08574673, 0],
    'priority': [0.08396125, 0.08361582, 0.57198444],
    'type': [0.36653895, 0.12583893, 0.72687225, 0.17578125]
}

# Evaluate each arm type
results = {arm_type: compute_metrics(q_values) for arm_type, q_values in arm_types.items()}

# Display results
for arm_type, metrics in results.items():
    print(f"{arm_type}: {metrics}")


complexity: {'mean': np.float64(0.07126998999999999), 'std_dev': np.float64(0.05327432377543676), 'cv': np.float64(0.7475000877008229), 'entropy': np.float64(nan), 'gini': np.float64(0.3993054829638362), 'top_2_concentration': np.float64(1.0)}
priority: {'mean': np.float64(0.24652050333333334), 'std_dev': np.float64(0.2301377998553537), 'cv': np.float64(0.9335442559281663), 'entropy': np.float64(0.6921823464455177), 'gini': np.float64(0.44023259133645287), 'top_2_concentration': np.float64(0.8869386536895356)}
type: {'mean': np.float64(0.34875784499999996), 'std_dev': np.float64(0.23606165298820578), 'cv': np.float64(0.6768640659200248), 'entropy': np.float64(1.1688721800791742), 'gini': np.float64(0.35731412364358434), 'top_2_concentration': np.float64(0.7837896807740627)}


  entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))
  entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))


In [None]:
from typing import Dict, List
import random


class Recommender:
    def __init__(self, tasks: List):
        self.tasks = tasks

    def recommend(self, num_recs = 5) -> List:
        return random.sample(self.tasks, num_recs)

class MABRecommender(Recommender):
    def __init__(self, tasks: List):
        super.__init__(self, tasks)
        self.arms = {}
        for task in tasks:
            priority = task.get('priority')
            task_list = self.arms.get(priority, [])
            task_list.append(task)
            self.arms[priority] = task_list
        self.num_arms = len(self.arms)

    @abstractmethod
    def pull_arm(self) -> str:
        
    def recommend(self, num_recs = 5) -> List:
        for tasks in self.arms.values():
            random.shuffle(tasks)

        recs = {}
        recs_provided = 0
        arm_rec_count = {k : 0 for k in self.arms.keys()}
        max_recs = np.ceil(num_recs * 0.5)
        
        while recs_provided < num_recs:
            arm = self.pull_arm()

            if arm_rec_count[arm] == max_recs:
                
            task_list = self.arms[arm]
            for task in task_list:
                if task not in recs:
                    recs.add(task)
                    recs_provided += 1
                    arm_rec_count[arm] += 1
                    break
        return recs
                    
class EpsilonRecommender(MABRecommender):
    def __init__(self, tasks: List, qvalues: Dict): 
        super__init__(self, tasks)
        self.idx_to_arm = []
        self.qvalues = []
        for k, v in qvalues.items():
            self.qvalues.append(v)
            self.idx_to_arm.append(k)
        self.epsilon = 0.2

    def pull_arm(self):
        if random.random() < self.epsilon:
            idx = random.randint(0, self.num_arms-1)
        else:
            idx = np.argmax(self.qvalues)
        return self.idx_to_arm[idx]

class ThompsonSamplingRecommender(MABRecommender):
    def __init__(self, tasks: List, ts_values: Dict):
        super.__init__(self, tasks)
        self.ts_values = ts_values

    def pull_arm(self):
        for 

In [25]:
df

Unnamed: 0,id,name,complexity,type,due_date,priority,repeatable,status,rec_ts,was_picked
0,6,nlp scratch: seq 2 seq learn,medium,learning,,medium,0,pending,16,0
1,7,attention in PyTorch,medium,learning,,low,0,pending,16,0
2,15,read ml book,medium,learning,,medium,0,done,16,0
3,8,tidy living room,simple,chores,,low,0,done,16,0
4,16,RL: gaussian LL,medium,learning,,medium,0,pending,16,0
...,...,...,...,...,...,...,...,...,...,...
3209,32,happiness: mab,medium,constructive,,medium,0,in_progress,18,0
3210,47,boil eggs,simple,chores,,low,1,pending,18,0
3211,39,face pack,simple,chores,,low,1,pending,18,0
3212,4,vector search,hard,learning,,medium,0,pending,18,0


In [32]:
ts = {'medium': [91.0, 631.6666666666657],
 'low': [79.0, 2289.0000000000095],
 'high': [750.0, 1]}

for k,v in ts.items():
    b = np.random.beta(v[0], v[1])
    print (f'{k} :: {b}')

medium :: 0.1089622191730933
low :: 0.03182924638764335
high :: 0.9977455966923572
