In [None]:
import random
import itertools
import copy
import numpy as np
import pandas as pd
from scipy.special import logsumexp
from collections import defaultdict
from Scripts import OrderedCategorySystem as OCS
from Scripts import generate_plots as plots
from Scripts import order_analyses as analyses

In [2]:
F =  [12, 13, 15, 14, 16, 18, 17, 19, 20]
B =  [20, 19, 17, 18, 16, 14, 15, 13, 12]
M1 =  [16, 17, 15, 18, 14, 19, 13, 20, 12]
M2 = [16, 15, 17, 14, 18, 13, 19, 12, 20]

NEW = [i for i in range(9, 24)]
ALL = NEW + [1, 3, 29, 31]

SHIFT = 3
DISTRACTORS = [1, 3, 29, 31]

ITEMS = ['I09', 'I10', 'I11', 'I12', 'I13', 'I14', 'I15', 'I16', 'I17', 'I18', 'I19', 'I20', 'I21', 'I22', 'I23']

LEFT = ITEMS[:9]
CENTRE = ITEMS[3:12]
RIGHT = ITEMS[6:]

LOCS = [('L', LEFT),
        ('C', CENTRE), 
        ('R', RIGHT)]
ORDERS = [('f', 0, [1, 2, 3, 4, 5, 6, 7, 8]),
          ('m', 4, [0, 1, 2, 3, 5, 6, 7, 8]),
          ('b', 8, [0, 1, 2, 3, 4, 5, 6, 7])]

item_space = [i for i in range(1, 32)]

### Load Participant Data

In [3]:
allParticipants = pd.read_csv('../Analysis/Results/participant_data.csv')
allParticipants =  allParticipants[(allParticipants['ATTEMPTS'] < 3) & (allParticipants['TOTAL_ERRORS'] < 4)]
participants = allParticipants['P_ID'].tolist()

participant_df = pd.read_csv('../Analysis/Results/trial_data.csv')    
participant_df = participant_df[participant_df['P_ID'].isin(participants)]

cat_assigns = ITEMS + ['I01', 'I03', 'I29', 'I31']
others = participant_df.columns.difference(cat_assigns)

participant_df = (
    participant_df[others]
      .assign(ITEMS = participant_df[cat_assigns].agg(
            lambda row: {k: v for k, v in row.items() if not pd.isna(v)},
            axis=1
        )
      )
)
participant_trials = list(participant_df.to_dict('index').values())

In [None]:
def generate_possible_sets(order, loc):
    distract_orders =  list(itertools.permutations(DISTRACTORS))
    if order in ['f', 'b', 'm']: # Sequence
        if order == 'f':
            item_orders = [F]
        elif order == 'b':
            item_orders = [B]
        else:
            item_orders = [M1, M2]
    possible_orders = []
    for i_ord in item_orders:
        for d_ord in distract_orders:
            if loc == 'L':
                loc_ord = [i-SHIFT for i in i_ord]
            elif loc == 'R':
                loc_ord = [i+SHIFT for i in i_ord]
            else:
                loc_ord = i_ord
            final_ord = loc_ord[0:1] + [d_ord[0]] + loc_ord[1:4] + [d_ord[1]] + loc_ord[4:5] + [d_ord[2]] + loc_ord[5:8] + [d_ord[3]]+ loc_ord[8:9]
            possible_orders.append(final_ord)
    return possible_orders
            
    
def analyze_participant(cat_assigns, orders, syst, D, lookupTree=None, temp=1):
    log_likes = []
    if lookupTree is None:
        lookupTree = defaultdict(lambda: None)
    for i_ord in orders:
        start_syst = copy.deepcopy(syst)
        log_like = OCS.model_likelihood(start_syst, cat_assigns, i_ord, D, lookupTree, temp)
        log_likes.append(log_like)
    marginal_like = logsumexp(log_likes) - np.log(len(orders))
    max_like = max(log_likes)
    min_like = min(log_likes)
    return marginal_like, max_like, min_like

def generate_all_orderings():
    order_dic = {}
    for loc in ['L', 'C', 'R']:
        for order in ['f', 'b', 'm']:
            order_dic[f'{loc}{order}'] = generate_possible_sets(order, loc)

    return order_dic

def estimate_model_likelihood(trials, temp=1):
    log_like = 0
    worst_like = 0
    best_like = 0
    p = 0
    order_dic = generate_all_orderings()
    D, item_hash = OCS.get_distance_mat(item_space)
    lookupTree = defaultdict(lambda: None) # can only do this here if assuming items are the same across trees. 
    for t in trials:
        d, l, o = t['DEPTH'], t['LOC'], t['ORDER']
        cat_assigns = t['ITEMS']
        if d== 2:
            syst = OCS.CategorySystem(item_hash, '..\\..\\Katie2025_AlienTaxonomist\\static_98863bd139ec98cf6bc52549beaaf679\\taxonomies\\tree2D.json')
        else:
            syst = OCS.CategorySystem(item_hash, '..\\..\\Katie2025_AlienTaxonomist\\static_98863bd139ec98cf6bc52549beaaf679\\taxonomies\\tree3D.json')
        orders = order_dic[f'{l}{o}']
        marginal_like, max_like, min_like = analyze_participant(cat_assigns, orders, syst, D, lookupTree, temp)
        log_like += marginal_like
        best_like += max_like
        worst_like += min_like
        if p%50 == 0:
            print(f'{p+1} trials processed')
        p += 1
    return log_like, worst_like, best_like

In [None]:
random.seed(13)
np.random.seed(13)

ordered_trials = [t for t in participant_trials if t['ORDER'] != 'a']
log_like_ckmm, worst_like_ckmm, best_like_ckmm = estimate_model_likelihood(ordered_trials, temp=2)

1 trials processed
51 trials processed
101 trials processed
151 trials processed
201 trials processed
251 trials processed
301 trials processed
351 trials processed
401 trials processed
451 trials processed
501 trials processed
551 trials processed
601 trials processed
651 trials processed


In [6]:
print(log_like_ckmm, worst_like_ckmm, best_like_ckmm )

-8210.012968818644 -10397.30350644622 -7711.700189885906


### Random assignment

In [None]:
num_3 = len([t for t in ordered_trials if t['DEPTH'] == 3])
num_2 = len([t for t in ordered_trials if t['DEPTH'] == 2])

random_2level = np.log(1/3)*13
random_3level = np.log(1/7)*13
print(random_2level, random_3level)

random_likelihood = (random_2level*num_2) + (random_3level*num_3)
print(random_likelihood)

-14.281959752685427 -25.296831937719077
-13125.876881461609
