In [1]:
'''
   Trying MAB
'''

'\n   Trying MAB\n'

In [1]:
# lets get the data first
import numpy as np
import pandas as pd
import sqlalchemy

def get_connection():
    engine = sqlalchemy.create_engine('sqlite:////Users/nikhillondhe/projects/didactic-happiness/instance/tasks.db')
    return engine
    
def load_data():
    ''' Load data from sqlite db '''
    query = '''
         SELECT T.*, CAST(strftime('%H', R.rec_ts) as Integer) as rec_ts,
         CASE
             WHEN 
                 W.id IS NOT NULL 
                 AND cast((julianday(end_ts) - julianday(start_ts)) * 24 * 60 * 60 as Integer) > 60
             THEN 1
             ELSE 0
         END AS was_picked
         FROM task T
         INNER JOIN recommendation R
         ON T.id = R.task_id
         LEFT JOIN
         work_log W
         ON R.id = W.rec_id
    '''
    engine = get_connection()
    df = pd.read_sql(query, engine)
    engine.dispose()
    return df

In [2]:
from typing import Dict, List


class HashEncoder:
    def __init__(self, fields: List):
        self.fields = fields
        self.field_lookups = {f: {} for f in fields}

    def get_hash(self, input: Dict) -> int:
        encodes = []
        for f in self.fields:
            val = input.get(f, None)
            lookup = self.field_lookups[f]
            encoded = lookup.get(val, len(lookup))
            encodes.append(encoded)
            if val not in lookup:
                lookup[val] = encoded
        value = 0
        for e in encodes:
            value = value << 3
            value += e
        return value
enc = HashEncoder(['complexity', 'type', 'priority', 'repeatable'])

In [3]:
def get_pending_tasks() -> pd.DataFrame:
    query = '''
       SELECT * FROM task
       WHERE task.status != 'done'
    '''
    engine = get_connection()
    df = pd.read_sql(query, engine)
    engine.dispose()
    return df



def get_hash(row):
    return enc.get_hash(row.to_dict())
    
tasks = get_pending_tasks()
tasks['hash'] = tasks.apply(get_hash, axis=1)
tasks

Unnamed: 0,id,name,complexity,type,due_date,priority,repeatable,status,hash
0,2,make food,simple,chores,,low,1,pending,0
1,3,transformer paper,medium,learning,,medium,1,pending,584
2,4,vector search,hard,learning,,medium,0,pending,1097
3,5,lang generation: debug model output,medium,learning,,medium,0,pending,585
4,6,nlp scratch: seq 2 seq learn,medium,learning,,medium,0,pending,585
5,7,attention in PyTorch,medium,learning,,low,0,pending,577
6,13,laundry,simple,chores,,low,1,pending,0
7,16,RL: gaussian LL,medium,learning,,medium,0,pending,585
8,17,blank apply,simple,constructive,,high,1,pending,144
9,19,wa: stitch messages,medium,constructive,,medium,0,pending,649


In [40]:
tasks['hash'].value_counts()

hash
0       3
649     3
585     3
16      3
593     2
1169    2
144     2
9       2
192     2
65      2
584     1
1097    1
145     1
1       1
577     1
705     1
72      1
73      1
136     1
Name: count, dtype: int64

In [6]:
reward_col = 'was_picked'

df = load_data()

In [7]:
from collections import defaultdict

def compute_qvalues(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    values = df[col].unique()
    k = len(values)
    arm_to_idx = {v: k for k,v in enumerate(values)}
    Qvalues = np.zeros(k)
    counts = np.zeros(k)

    for idx, row in df.iterrows():
        arm_name = row[col]
        reward = rewards[arm_name] * row[reward_col]
        arm_idx = arm_to_idx[arm_name]
        counts[arm_idx] += 1
        Qvalues[arm_idx] += (reward - Qvalues[arm_idx]) / counts[arm_idx]

    
    return {k : Qvalues[v] for k, v in arm_to_idx.items()}

def compute_qvalues_v2(df: pd.DataFrame, reward_col: str, encoder: HashEncoder) -> dict:
    '''Compute qvalues using arm hashing'''
    Qvalues = defaultdict(float)
    counts = defaultdict(int)

    for _, row in df.iterrows():
        hash_code = encoder.get_hash(row.to_dict())
        reward = row[reward_col]
        counts[hash_code] += 1
        Qvalues[hash_code] += (reward - Qvalues[hash_code]) * 1.0 / counts[hash_code]

    return Qvalues, counts

In [8]:
qvalues, counts = compute_qvalues_v2(df, reward_col, enc)
qvalues

defaultdict(float,
            {585: 0.005736137667304019,
             577: 0.0,
             1: 0.08227848101265826,
             145: 0.15736040609137056,
             649: 0.04793028322440085,
             0: 0.14849624060150388,
             593: 0.07888040712468189,
             192: 0.05740181268882174,
             144: 0.11158798283261807,
             584: 0.012820512820512822,
             1097: 0.015384615384615394,
             193: 0.38095238095238104,
             129: 0.04803493449781659,
             705: 0.012658227848101266,
             16: 0.4385964912280703,
             17: 0.10638297872340426,
             73: 0.008130081300813007,
             137: 0.10256410256410256,
             81: 0.07102803738317759,
             136: 0.13253012048192778,
             72: 0.14285714285714285,
             9: 0.038647342995169066,
             8: 0.04494382022471911,
             65: 0.04724409448818898,
             1169: 0.049180327868852444,
             641: 0.13513513

In [9]:
counts

defaultdict(int,
            {585: 523,
             577: 174,
             1: 316,
             145: 197,
             649: 459,
             0: 532,
             593: 786,
             192: 331,
             144: 233,
             584: 156,
             1097: 195,
             193: 21,
             129: 229,
             705: 158,
             16: 114,
             17: 94,
             73: 369,
             137: 39,
             81: 535,
             136: 83,
             72: 77,
             9: 207,
             8: 89,
             65: 127,
             1169: 122,
             641: 37,
             713: 16})

In [11]:
import pickle

with open('/Users/nikhillondhe/projects/didactic-happiness/models/eps-mab.pkl', 'wb') as f:
    obj = {'qvalues': qvalues, 'counts': counts}
    pickle.dump(obj, f)


In [43]:
# trying to do thomson sampling for alpha, beta comps

def compute_alpha_beta(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    #alpha += reward, beta += (exp reward - reward) but min(beta) = 1
    values = df[col].unique()
    distributions = {val : [1.0, 1.0] for val in values}
    exp_reward = sum(rewards.values()) * 1.0 / len(rewards)
    for _, row in df.iterrows():
        arm_name = row[col]
        distribution = distributions[arm_name]
        rewarded = row[reward_col]
        reward_value = rewards[arm_name]
        
        if rewarded:
            distribution[0] += reward_value
        else:
            distribution[1] += (exp_reward - reward_value)
            if distribution[1] < 1:
                distribution[1] = 1

    return distributions

def compute_alpha_beta_v2(df: pd.DataFrame, reward_col: str, encoder: HashEncoder) -> dict:
    '''Compute alpha and beta for thomspon sampling using hash encoded arms'''
    distributions = {}
    
    for _, row in df.iterrows():
        hash_code = encoder.get_hash(row.to_dict())
        reward = row[reward_col]
        distribution = distributions.get(hash_code, [0, 0])
        if reward:
            distribution[0] += 1
        else:
            distribution[1] += 1
        distributions[hash_code] = distribution

    return distributions

In [44]:
distributions = compute_alpha_beta_v2(df, reward_col, enc)
distributions

{585: [2, 333],
 577: [0, 81],
 1: [16, 166],
 145: [3, 133],
 649: [15, 290],
 0: [51, 268],
 593: [43, 482],
 192: [10, 194],
 144: [12, 132],
 584: [1, 99],
 1097: [0, 91],
 193: [8, 13],
 129: [7, 133],
 705: [0, 96],
 16: [25, 35],
 17: [4, 64],
 73: [0, 80],
 137: [2, 15],
 81: [32, 410],
 136: [7, 42],
 72: [4, 39],
 9: [9, 81],
 65: [1, 28],
 1169: [1, 16]}

In [60]:
enc.field_lookups

{'complexity': {'simple': 0, 'medium': 1, 'hard': 2},
 'type': {'chores': 0, 'learning': 1, 'constructive': 2, 'creative': 3},
 'priority': {'low': 0, 'medium': 1, 'high': 2},
 'repeatable': {1: 0, 0: 1}}

In [55]:
def compute_ucb_values(df: pd.DataFrame, col: str, rewards: dict, reward_col: str) -> dict:
    '''Compute ucb values from historical data'''
    # avg reward + c * sqrt(ln(t) / num choiced)
    lnt = np.log(len(df))
    data = df.groupby([col, reward_col]).size().unstack()
    ucb = {}
    
    for k, v in rewards.items():
        n = data.loc[k].sum()
        reward = data.loc[k, 1] * v * 1.0
        avg_reward = reward / n
        ucb[k] = avg_reward + np.sqrt(lnt / n)
    return ucb

def compute_ucb_values_v2(df: pd.DataFrame, reward_col: str, encoder: HashEncoder) -> dict:
    lnt = np.log(len(df))
    data = df.copy(deep=True)
    data['hash'] = data.apply(lambda row: encoder.get_hash(row.to_dict()), axis=1)
    data = data.groupby(['hash', reward_col]).size().unstack()
    data.fillna(0, inplace=True)
    ucb = {}

    for _, row in data.iterrows():
        n = row.sum()
        reward = row.loc[1]
        avg_reward = reward * 1.0 / n
        ucb[row.name]= avg_reward + np.sqrt(lnt / n)
    return ucb

In [56]:
ucb = compute_ucb_values_v2(df, reward_col, enc)
ucb

{0: np.float64(0.32002188260858166),
 1: np.float64(0.29993324453472536),
 9: np.float64(0.4015043890544616),
 16: np.float64(0.785932620863173),
 17: np.float64(0.4056884949946473),
 65: np.float64(0.5656311791963122),
 72: np.float64(0.529218238237122),
 73: np.float64(0.31979369708687533),
 81: np.float64(0.20844982297515718),
 129: np.float64(0.29174131238222656),
 136: np.float64(0.5514745402641312),
 137: np.float64(0.8113769899892946),
 144: np.float64(0.32169348182074325),
 145: np.float64(0.26732939284910645),
 192: np.float64(0.2492821890948648),
 193: np.float64(1.0051257655835064),
 577: np.float64(0.31781353131654655),
 584: np.float64(0.29603217818489186),
 585: np.float64(0.16224622238073994),
 593: np.float64(0.206739438830987),
 649: np.float64(0.21296187665768912),
 705: np.float64(0.2919303694040929),
 1097: np.float64(0.299843195205764),
 1169: np.float64(0.7525534605775299)}

In [15]:
def compute_metrics(q_values):
    mean = np.mean(q_values)
    std_dev = np.std(q_values)
    cv = std_dev / mean if mean > 0 else 0
    entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))
    gini = np.sum(np.abs(np.subtract.outer(q_values, q_values))) / (2 * len(q_values) * np.sum(q_values))
    top_k_concentration = np.sum(sorted(q_values, reverse=True)[:2]) / np.sum(q_values)  # Top-2 concentration

    return {
        "mean": mean,
        "std_dev": std_dev,
        "cv": cv,
        "entropy": entropy,
        "gini": gini,
        "top_2_concentration": top_k_concentration,
    }

arm_types = {
    'complexity': [0.12806324, 0.08574673, 0],
    'priority': [0.08396125, 0.08361582, 0.57198444],
    'type': [0.36653895, 0.12583893, 0.72687225, 0.17578125]
}

# Evaluate each arm type
results = {arm_type: compute_metrics(q_values) for arm_type, q_values in arm_types.items()}

# Display results
for arm_type, metrics in results.items():
    print(f"{arm_type}: {metrics}")


complexity: {'mean': np.float64(0.07126998999999999), 'std_dev': np.float64(0.05327432377543676), 'cv': np.float64(0.7475000877008229), 'entropy': np.float64(nan), 'gini': np.float64(0.3993054829638362), 'top_2_concentration': np.float64(1.0)}
priority: {'mean': np.float64(0.24652050333333334), 'std_dev': np.float64(0.2301377998553537), 'cv': np.float64(0.9335442559281663), 'entropy': np.float64(0.6921823464455177), 'gini': np.float64(0.44023259133645287), 'top_2_concentration': np.float64(0.8869386536895356)}
type: {'mean': np.float64(0.34875784499999996), 'std_dev': np.float64(0.23606165298820578), 'cv': np.float64(0.6768640659200248), 'entropy': np.float64(1.1688721800791742), 'gini': np.float64(0.35731412364358434), 'top_2_concentration': np.float64(0.7837896807740627)}


  entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))
  entropy = -np.sum((q_values / np.sum(q_values)) * np.log(q_values / np.sum(q_values)))


In [91]:
from collections import defaultdict
from typing import Dict, List
import abc
import random


class Recommender:
    def __init__(self, tasks: List):
        self.tasks = tasks

    def recommend(self, num_recs = 5) -> List:
        return random.sample(self.tasks, num_recs)

In [108]:
class MABRecommender(Recommender):
    def __init__(self, tasks: List):
        super().__init__(tasks)
        self.arms = defaultdict(list)
        self._add_tasks(tasks)

        self.num_arms = len(self.arms)
        self.ctr = 0 
        self.cache = {}

    def _add_tasks(self, tasks):
        for task in tasks:
            task_hash = task.get('hash')
            self.arms[task_hash].append(task)
    
    @abc.abstractmethod
    def get_arms(self, num_recs) -> list:
        pass

    @abc.abstractmethod
    def reset_arm_history(self):
        pass

    @abc.abstractmethod
    def pull_arm(self) -> int:
        pass

    def replenish_arms(self):
        if (self.ctr - 3) in self.cache:
            tasks = self.cache.pop(self.ctr - 3)
            self._add_tasks(tasks)
                
    def recommend(self, num_recs = 5) -> List:
        arms = self.get_arms(num_recs)
        return [self.arms[arm][0] for arm in arms]

    def recommend_v2(self, num_recs = 5) -> List:
        self.reset_arm_history()
        self.replenish_arms()
        recs = []

        while len(recs) < num_recs: # could become infinite loop
            arm = self.pull_arm()
            if self.arms[arm]:
                task = self.arms[arm].pop(0)
                recs.append(task)
        self.cache[self.ctr] = recs
        self.ctr += 1
        return recs

In [109]:
class EpsilonRecommender(MABRecommender):
    def __init__(self, tasks: List, qvalues: Dict): 
        super().__init__(tasks)
        self.qvalues = self.init_qvalues(qvalues)
        self.epsilon = 0.2
        self.arm_history = set()
        self.arm_keys = set(self.arms.keys())

    def init_qvalues(self, qvalues: Dict) -> Dict:
        return sorted((qv for qv in qvalues.items() if qv[0] in self.arms.keys()), key=lambda x: x[1], reverse=True)
        
    def get_arms(self, num_recs):
        recs = set()
        
        while (len(recs) < num_recs):
            if random.random() < self.epsilon:
                # explore
                available_arms = list(self.arm_keys - recs)
                rec = random.choice(available_arms)
            else:
                # exploit
                for arm, _ in self.qvalues:
                    if arm not in recs:
                        rec = arm
                        break
            recs.add(rec)
        return recs

    def reset_arm_history(self):
        self.arm_history.clear()
        
    def pull_arm(self) -> int:
        if random.random() < self.epsilon:
            # explore
            available_arms = list(self.arm_keys - self.arm_history)
            rec = random.choice(available_arms)
        else:
            for arm, _ in self.qvalues:
                if arm not in self.arm_history:
                    rec = arm
                    break
        self.arm_history.add(rec)
        return rec

In [112]:
class ThompsonSamplingRecommender(MABRecommender):
    def __init__(self, tasks: List, ts_values: Dict):
        super().__init__(tasks)
        epsilon = 1e-6
        sampled_values = {arm : random.betavariate(max(row[0], epsilon), max(row[1], epsilon)) for arm, row in ts_values.items() if arm in self.arms}
        self.sorted_values = sorted(sampled_values.items(), key = lambda x: x[1], reverse=True)
        self.arm_history = set()

    def get_arms(self, num_recs):
        recs = set()

        for arm, _ in self.sorted_values:
            if arm not in recs:
                recs.add(arm)
                if len(recs) == num_recs:
                    break
        return recs

    def reset_arm_history(self):
        self.arm_history.clear()

    def pull_arm(self) -> int:
        for arm, _ in self.sorted_values:
            if arm not in self.arm_history:
                rec = arm
                break
        self.arm_history.add(rec)
        return rec

In [116]:
class UCBSamplingRecommender(MABRecommender):
    def __init__(self, tasks: List, ucb_values: Dict):
        super().__init__(tasks)
        self.ucb_values = sorted((val for val in ucb_values.items() if val[0] in self.arms), key = lambda x: x[1], reverse=True)
        self.arm_history = set()

    def get_arms(self, num_recs):
        recs = set()

        for arm, _ in self.ucb_values:
            if arm not in recs:
                recs.add(arm)
                if len(recs) == num_recs:
                    break
        return recs

    def reset_arm_history(self):
        self.arm_history.clear()

    def pull_arm(self) -> int:
        for arm, _ in self.ucb_values:
            if arm not in self.arm_history:
                rec = arm
                break
        self.arm_history.add(rec)
        return rec

In [25]:
df

Unnamed: 0,id,name,complexity,type,due_date,priority,repeatable,status,rec_ts,was_picked
0,6,nlp scratch: seq 2 seq learn,medium,learning,,medium,0,pending,16,0
1,7,attention in PyTorch,medium,learning,,low,0,pending,16,0
2,15,read ml book,medium,learning,,medium,0,done,16,0
3,8,tidy living room,simple,chores,,low,0,done,16,0
4,16,RL: gaussian LL,medium,learning,,medium,0,pending,16,0
...,...,...,...,...,...,...,...,...,...,...
3209,32,happiness: mab,medium,constructive,,medium,0,in_progress,18,0
3210,47,boil eggs,simple,chores,,low,1,pending,18,0
3211,39,face pack,simple,chores,,low,1,pending,18,0
3212,4,vector search,hard,learning,,medium,0,pending,18,0


In [32]:
ts = {'medium': [91.0, 631.6666666666657],
 'low': [79.0, 2289.0000000000095],
 'high': [750.0, 1]}

for k,v in ts.items():
    b = np.random.beta(v[0], v[1])
    print (f'{k} :: {b}')

medium :: 0.1089622191730933
low :: 0.03182924638764335
high :: 0.9977455966923572


In [75]:
tasklist = tasks.to_dict('records')

In [94]:
r1 = Recommender(tasklist)
r1.recommend()

[{'id': 67,
  'name': 'tldr ai',
  'complexity': 'simple',
  'type': 'learning',
  'due_date': None,
  'priority': 'medium',
  'repeatable': 1,
  'status': 'pending',
  'hash': 72},
 {'id': 69,
  'name': 'bmo',
  'complexity': 'simple',
  'type': 'chores',
  'due_date': None,
  'priority': 'medium',
  'repeatable': 0,
  'status': 'pending',
  'hash': 9},
 {'id': 127,
  'name': 'fix hue',
  'complexity': 'simple',
  'type': 'chores',
  'due_date': None,
  'priority': 'low',
  'repeatable': 0,
  'status': 'pending',
  'hash': 1},
 {'id': 14,
  'name': 'ironing',
  'complexity': 'simple',
  'type': 'chores',
  'due_date': None,
  'priority': 'low',
  'repeatable': 1,
  'status': 'pending',
  'hash': 0},
 {'id': 35,
  'name': 'mellow daily',
  'complexity': 'simple',
  'type': 'constructive',
  'due_date': None,
  'priority': 'high',
  'repeatable': 1,
  'status': 'pending',
  'hash': 144}]

In [111]:
qvalues = {585: 0.00597014925373134,
             577: 0.0,
             1: 0.08791208791208793,
             145: 0.02205882352941177,
             649: 0.04918032786885249,
             0: 0.15987460815047033,
             593: 0.08190476190476194,
             192: 0.04901960784313723,
             144: 0.08333333333333336,
             584: 0.010000000000000002,
             1097: 0.0,
             193: 0.38095238095238104,
             129: 0.050000000000000024,
             705: 0.0,
             16: 0.4166666666666668,
             17: 0.05882352941176471,
             73: 0.0,
             137: 0.11764705882352941,
             81: 0.07239819004524886,
             136: 0.14285714285714288,
             72: 0.0930232558139535,
             9: 0.1,
             65: 0.03448275862068967,
             1169: 0.05882352941176472}
r2 = EpsilonRecommender(tasklist, qvalues)
print(r2.recommend())
print ('--------')
for _ in range(5):
    print(r2.recommend_v2())
    print ('=========================================')

[{'id': 2, 'name': 'make food', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'low', 'repeatable': 1, 'status': 'pending', 'hash': 0}, {'id': 119, 'name': 'claude styles', 'complexity': 'simple', 'type': 'learning', 'due_date': None, 'priority': 'low', 'repeatable': 0, 'status': 'pending', 'hash': 65}, {'id': 67, 'name': 'tldr ai', 'complexity': 'simple', 'type': 'learning', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 72}, {'id': 65, 'name': 'Journal', 'complexity': 'simple', 'type': 'constructive', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 136}, {'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatable': 1, 'status': 'pending', 'hash': 16}]
--------
[{'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatable': 1, 'status': 'pending', 'h

In [113]:
alpha_beta = {585: [2, 333],
 577: [0, 81],
 1: [16, 166],
 145: [3, 133],
 649: [15, 290],
 0: [51, 268],
 593: [43, 482],
 192: [10, 194],
 144: [12, 132],
 584: [1, 99],
 1097: [0, 91],
 193: [8, 13],
 129: [7, 133],
 705: [0, 96],
 16: [25, 35],
 17: [4, 64],
 73: [0, 80],
 137: [2, 15],
 81: [32, 410],
 136: [7, 42],
 72: [4, 39],
 9: [9, 81],
 65: [1, 28],
 1169: [1, 16]}
r3 = ThompsonSamplingRecommender(tasklist, alpha_beta)
print(r3.recommend())
print ('--------')
for _ in range(5):
    print(r3.recommend_v2())
    print ('=========================================')

[{'id': 2, 'name': 'make food', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'low', 'repeatable': 1, 'status': 'pending', 'hash': 0}, {'id': 67, 'name': 'tldr ai', 'complexity': 'simple', 'type': 'learning', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 72}, {'id': 65, 'name': 'Journal', 'complexity': 'simple', 'type': 'constructive', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 136}, {'id': 69, 'name': 'bmo', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'medium', 'repeatable': 0, 'status': 'pending', 'hash': 9}, {'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatable': 1, 'status': 'pending', 'hash': 16}]
--------
[{'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatable': 1, 'status': 'pending', 'hash': 16}, 

In [117]:
ucb_values = {0: np.float64(0.32002188260858166),
 1: np.float64(0.29993324453472536),
 9: np.float64(0.4015043890544616),
 16: np.float64(0.785932620863173),
 17: np.float64(0.4056884949946473),
 65: np.float64(0.5656311791963122),
 72: np.float64(0.529218238237122),
 73: np.float64(0.31979369708687533),
 81: np.float64(0.20844982297515718),
 129: np.float64(0.29174131238222656),
 136: np.float64(0.5514745402641312),
 137: np.float64(0.8113769899892946),
 144: np.float64(0.32169348182074325),
 145: np.float64(0.26732939284910645),
 192: np.float64(0.2492821890948648),
 193: np.float64(1.0051257655835064),
 577: np.float64(0.31781353131654655),
 584: np.float64(0.29603217818489186),
 585: np.float64(0.16224622238073994),
 593: np.float64(0.206739438830987),
 649: np.float64(0.21296187665768912),
 705: np.float64(0.2919303694040929),
 1097: np.float64(0.299843195205764),
 1169: np.float64(0.7525534605775299)}
r4 = UCBSamplingRecommender(tasklist, ucb_values)
print(r4.recommend())
print ('--------')
for _ in range(5):
    print(r4.recommend_v2())
    print ('=========================================')

[{'id': 119, 'name': 'claude styles', 'complexity': 'simple', 'type': 'learning', 'due_date': None, 'priority': 'low', 'repeatable': 0, 'status': 'pending', 'hash': 65}, {'id': 67, 'name': 'tldr ai', 'complexity': 'simple', 'type': 'learning', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 72}, {'id': 65, 'name': 'Journal', 'complexity': 'simple', 'type': 'constructive', 'due_date': None, 'priority': 'medium', 'repeatable': 1, 'status': 'pending', 'hash': 136}, {'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatable': 1, 'status': 'pending', 'hash': 16}, {'id': 123, 'name': 'Business plan for t-shirt idea', 'complexity': 'hard', 'type': 'constructive', 'due_date': None, 'priority': 'high', 'repeatable': 0, 'status': 'pending', 'hash': 1169}]
--------
[{'id': 45, 'name': 'take out trash', 'complexity': 'simple', 'type': 'chores', 'due_date': None, 'priority': 'high', 'repeatab

In [119]:
len(r4.ucb_values)

18