# Define RDMs based on student data
Natalia VÃ©lez, April 2022

In [8]:
%matplotlib inline

import sys
import pandas as pd
import numpy as np
from ast import literal_eval
import matplotlib.pyplot as plt
import seaborn as sns
import rsatoolbox as rsa
from scipy.spatial import distance

sys.path.append('..')
from utils import upper_tri, gsearch

sys.path.append('../2_behavioral')
from teaching_models import human_df as teach_raw

sns.set_context('talk')
sns.set_style('white')

## Data wrangling

Load student data:

In [2]:
student_df = pd.read_csv('../3_student/outputs/student_beliefs.csv')
student_df['bets_sorted'] = student_df['bets_sorted'].apply(literal_eval)
student_df['bets_rescaled'] = student_df['bets_sorted'].apply(lambda v: (v/np.sum(v)).tolist())

print(student_df.shape)
student_df.head()

(15726, 15)


Unnamed: 0,time_elapsed,teacher,problem,num_trial,num_hint,bets,bonus,worker,trial_order,batch,order,bets_sorted,belief_in_true,teacher_rating,bets_rescaled
0,181304,4,34,0.0,1,"[0, 35, 41, 48]",0.013777,ARI1EMAHUA9PO,0,2022-02-02-1303,"['B', 'D', 'A', 'C']","[41, 0, 48, 35]",0.330645,2,"[0.33064516129032256, 0.0, 0.3870967741935484,..."
1,1103234,4,34,33.0,1,"[0, 43, 41, 48]",0.012942,AW0MG225VXWCN,47,2022-02-15-2034,"['B', 'D', 'A', 'C']","[41, 0, 48, 43]",0.310606,2,"[0.3106060606060606, 0.0, 0.36363636363636365,..."
2,1035089,4,34,6.0,1,"[0, 54, 44, 42]",0.013095,A24HOBENULGZA2,78,2022-02-15-2034,"['B', 'D', 'A', 'C']","[44, 0, 42, 54]",0.314286,2,"[0.3142857142857143, 0.0, 0.3, 0.3857142857142..."
3,429957,4,34,10.0,1,"[0, 25, 25, 25]",0.013889,A12IHTKQFQIPD1,102,2022-02-16-1639,"['B', 'D', 'A', 'C']","[25, 0, 25, 25]",0.333333,2,"[0.3333333333333333, 0.0, 0.3333333333333333, ..."
4,1011954,4,34,34.0,1,"[0, 2, 99, 4]",0.039286,A2XQ3CFB5HT2ZQ,113,2022-02-16-1639,"['B', 'D', 'A', 'C']","[99, 0, 4, 2]",0.942857,2,"[0.9428571428571428, 0.0, 0.0380952380952381, ..."


Get problem orders from teacher data

In [3]:
teach_timing = (
    teach_raw
    .copy()
    [['subject', 'run', 'block_idx', 'ex_idx', 'problem']]
)
teach_timing['hint_presented'] = 1
teach_timing['num_hint'] = teach_timing.groupby(['subject', 'problem'])['hint_presented'].transform('cumsum')
teach_timing = teach_timing.drop(columns=['hint_presented'])
teach_timing['trial'] = teach_timing.block_idx*3+teach_timing.ex_idx+1

print(teach_timing.shape)
teach_timing.head()

(3213, 7)


Unnamed: 0,subject,run,block_idx,ex_idx,problem,num_hint,trial
0,1,1,0,0,22,1,1
1,1,1,0,1,22,2,2
3,1,1,1,0,18,1,4
4,1,1,1,1,18,2,5
5,1,1,1,2,18,3,6


Get average beliefs

In [4]:
avg_list = []

for name,group in student_df.groupby(['teacher', 'problem', 'num_hint']):
    bets = group.bets_rescaled.values.tolist()
    bets_mtx = np.array(bets)
    
    avg_bet = bets_mtx.mean(axis=0)
    avg_list.append(name+tuple([avg_bet]))
    
avg_student = pd.DataFrame(avg_list, columns=['subject', 'problem', 'num_hint', 'bet'])
avg_student = teach_timing.merge(avg_student)
print(avg_student.shape)
avg_student.head()

(3122, 8)


Unnamed: 0,subject,run,block_idx,ex_idx,problem,num_hint,trial,bet
0,1,1,0,0,22,1,1,"[0.29026393202295653, 0.35708278351162803, 0.0..."
1,1,1,0,1,22,2,2,"[0.4325754714726199, 0.13024876969614882, 0.00..."
2,1,1,1,0,18,1,4,"[0.37872148531494354, 0.42886368466488933, 0.0..."
3,1,1,1,2,18,3,6,"[0.40182066942982697, 0.3910725882427214, 0.09..."
4,1,1,2,0,1,1,7,"[0.25761351287502954, 0.5630531537916372, 0.17..."


## Build teacher-specific RDMs

In [11]:
sub = 1
sub_id = f'sub-{sub:02}'

# merge with teacher behavioral data to get trial orders
teacher_bets = avg_student[avg_student.subject == sub].reset_index(drop=True)

# make distance matrix
n_obs = teacher_bets.shape[0]
student_dist = np.zeros((n_obs, n_obs))

for i, row_i in teacher_bets.iterrows():
    for j, row_j in teacher_bets.iterrows():
        student_dist[i,j] = distance.cosine(row_i.bet, row_j.bet)

# define behavioral rdm
student_model = rsa.model.ModelFixed('Student Beliefs', upper_tri(student_dist))

# line up with betas
beta_files = gsearch('outputs', 'roi_rsa', sub_id, '*RDM.h5')
f = beta_files[0]

In [12]:
beta_rdm = rsa.rdm.load_rdm(f)

In [13]:
beta_rdm

rsatoolbox.rdm.RDMs(
dissimilarity_measure = 
correlation
dissimilarities = 
[[0.468593   1.17439055 0.9595331  ... 0.4052614  0.28849727 0.59779489]]
descriptors = 
{}
rdm_descriptors = 
{'index': [0], 'roi': ['DMPFC'], 'session': [1], 'subject': ['sub-01']}
pattern_descriptors = 
{'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107], 'pattern': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 6