In [72]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from make_df import make_df_from_spreadsheet
from rectangle_model import *

In [16]:
df = pd.read_csv("exp1_data_clean.csv")

In [17]:
df.head(15)

Unnamed: 0,worker,problem,example,row,col
0,A28AXX4NCWPH1F,37,1,1,6
1,A28AXX4NCWPH1F,37,2,1,1
2,A28AXX4NCWPH1F,37,3,6,1
3,A28AXX4NCWPH1F,37,4,6,6
4,A28AXX4NCWPH1F,24,1,1,3
5,A28AXX4NCWPH1F,24,2,6,3
6,A28AXX4NCWPH1F,24,3,3,2
7,A28AXX4NCWPH1F,24,4,3,5
8,A28AXX4NCWPH1F,11,1,2,3
9,A28AXX4NCWPH1F,11,2,6,1


Change to zero indexing

In [18]:
cols_to_change = df.columns[1:]
for c in cols_to_change: 
    df[c] = df[c] - 1

In [34]:
df['coords'] = list(zip(df['row'], df['col']))

In [40]:
df

Unnamed: 0,worker,problem,example,row,col,coords
0,A28AXX4NCWPH1F,36,0,0,5,"(0, 5)"
1,A28AXX4NCWPH1F,36,1,0,0,"(0, 0)"
2,A28AXX4NCWPH1F,36,2,5,0,"(5, 0)"
3,A28AXX4NCWPH1F,36,3,5,5,"(5, 5)"
4,A28AXX4NCWPH1F,23,0,0,2,"(0, 2)"
...,...,...,...,...,...,...
2582,A3HNEYFOIJWPH1,21,1,4,3,"(4, 3)"
2583,A3HNEYFOIJWPH1,0,0,2,0,"(2, 0)"
2584,A3HNEYFOIJWPH1,0,1,3,0,"(3, 0)"
2585,A3HNEYFOIJWPH1,0,2,2,5,"(2, 5)"


In [37]:
# Convert to new coords system

def flatten_coords(coords): 
    """input is coords as a tuple, output is 0 to 36 index for coords"""
    return 6*coords[0] + coords[1]

In [43]:
df['coords_idx'] = df['coords'].apply(flatten_coords)
df

In [45]:
df

Unnamed: 0,worker,problem,example,row,col,coords,coords_idx
0,A28AXX4NCWPH1F,36,0,0,5,"(0, 5)",5
1,A28AXX4NCWPH1F,36,1,0,0,"(0, 0)",0
2,A28AXX4NCWPH1F,36,2,5,0,"(5, 0)",30
3,A28AXX4NCWPH1F,36,3,5,5,"(5, 5)",35
4,A28AXX4NCWPH1F,23,0,0,2,"(0, 2)",2
...,...,...,...,...,...,...,...
2582,A3HNEYFOIJWPH1,21,1,4,3,"(4, 3)",27
2583,A3HNEYFOIJWPH1,0,0,2,0,"(2, 0)",12
2584,A3HNEYFOIJWPH1,0,1,3,0,"(3, 0)",18
2585,A3HNEYFOIJWPH1,0,2,2,5,"(2, 5)",17


In [65]:
new_df = df.groupby(['worker', 'problem'])['coords_idx'].apply(tuple).reset_index()

new_df.head(20)

Add model predictions

In [73]:
filename = 'teaching_stimuli - all_examples (9).csv'
all_problems = make_df_from_spreadsheet(filename)

In [75]:
# Calculate model predictions for our problems

# Add a column for old problem indices ...
old_prob_indices = [43, 47, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65] + [i for i in range(66, 94)] 
new_df['old_prob_idx'] = new_df['problem'].apply(lambda x : old_prob_indices[x])

In [76]:
new_df

Unnamed: 0,worker,problem,example,row,col,coords,coords_idx,old_prob_idx
0,A28AXX4NCWPH1F,36,0,0,5,"(0, 5)",5,90
1,A28AXX4NCWPH1F,36,1,0,0,"(0, 0)",0,90
2,A28AXX4NCWPH1F,36,2,5,0,"(5, 0)",30,90
3,A28AXX4NCWPH1F,36,3,5,5,"(5, 5)",35,90
4,A28AXX4NCWPH1F,23,0,0,2,"(0, 2)",2,77
...,...,...,...,...,...,...,...,...
2582,A3HNEYFOIJWPH1,21,1,4,3,"(4, 3)",27,75
2583,A3HNEYFOIJWPH1,0,0,2,0,"(2, 0)",12,43
2584,A3HNEYFOIJWPH1,0,1,3,0,"(3, 0)",18,43
2585,A3HNEYFOIJWPH1,0,2,2,5,"(2, 5)",17,43


In [77]:
# loop over old prob index and create dataframes for model predictions for each 
model_preds = {}

for idx in old_prob_indices: 
    model_preds[idx] = {}
    model_preds[idx]['lit'] = find_teacher_probs(0, idx, all_problems)
    model_preds[idx]['prag'] = find_teacher_probs(250, idx, all_problems)


KeyboardInterrupt: 

In [78]:
model_preds

{43: {'lit': {'n_iter': 0,
   'problem_index': 43,
   1: {'d':          h_1       h_2       h_3       h_4
    0   0.000000  0.153846  0.000000  0.000000
    1   0.000000  0.153846  0.000000  0.000000
    2   0.045455  0.076923  0.000000  0.000000
    3   0.045455  0.000000  0.076923  0.000000
    4   0.000000  0.000000  0.153846  0.000000
    5   0.000000  0.000000  0.153846  0.000000
    6   0.000000  0.153846  0.000000  0.000000
    7   0.045455  0.076923  0.000000  0.000000
    8   0.045455  0.076923  0.000000  0.000000
    9   0.045455  0.000000  0.076923  0.000000
    10  0.045455  0.000000  0.076923  0.000000
    11  0.000000  0.000000  0.153846  0.000000
    12  0.045455  0.076923  0.000000  0.000000
    13  0.045455  0.076923  0.000000  0.000000
    14  0.022727  0.038462  0.038462  0.020833
    15  0.022727  0.038462  0.038462  0.020833
    16  0.045455  0.000000  0.076923  0.000000
    17  0.045455  0.000000  0.076923  0.000000
    18  0.045455  0.000000  0.000000  0.041667
 

### adlsfkjaldfk

In [60]:
#temp_df[temp_df.index[0]]
temp_df.loc[('A28AXX4NCWPH1F', 36)][:]

  temp_df.loc[('A28AXX4NCWPH1F', 36)][:]


Unnamed: 0_level_0,Unnamed: 1_level_0,example,row,col,coords,coords_idx
worker,problem,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A28AXX4NCWPH1F,36,0,0,5,"(0, 5)",5
A28AXX4NCWPH1F,36,1,0,0,"(0, 0)",0
A28AXX4NCWPH1F,36,2,5,0,"(5, 0)",30
A28AXX4NCWPH1F,36,3,5,5,"(5, 5)",35


In [30]:
df.set_index('problem')

Unnamed: 0_level_0,worker,example,row,col
problem,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
36,A28AXX4NCWPH1F,0,0,5
36,A28AXX4NCWPH1F,1,0,0
36,A28AXX4NCWPH1F,2,5,0
36,A28AXX4NCWPH1F,3,5,5
23,A28AXX4NCWPH1F,0,0,2
...,...,...,...,...
21,A3HNEYFOIJWPH1,1,4,3
0,A3HNEYFOIJWPH1,0,2,0
0,A3HNEYFOIJWPH1,1,3,0
0,A3HNEYFOIJWPH1,2,2,5


loop over unique problems