# TAC Discrimination Analysis: 

# Within Participants who have seen both discrimination images @ the same duration in the Verbal Judgement experiment

In [10]:
import os 
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import scipy 
import scipy.stats as stats
import json

In [8]:
# Load Target at Center Verbal Judgement Data 

TAC_data_path = '/Users/pmahableshwarkar/Documents/Depth_Project/depth_duration_analysis/TAC_data'

raw_TAC_250 = pd.read_csv(TAC_data_path + '/raw_250_data.csv')
raw_TAC_500 = pd.read_csv(TAC_data_path + '/raw_500_data.csv')
raw_TAC_750 = pd.read_csv(TAC_data_path + '/raw_750_data.csv')
raw_TAC_1000 = pd.read_csv(TAC_data_path + '/raw_1000_data.csv')

n_TAC_250 = pd.read_csv(TAC_data_path + '/normalized_250_data.csv')
n_TAC_500 = pd.read_csv(TAC_data_path + '/normalized_500_data.csv')
n_TAC_750 = pd.read_csv(TAC_data_path + '/normalized_750_data.csv')
n_TAC_1000 = pd.read_csv(TAC_data_path + '/normalized_1000_data.csv')

In [64]:
# Load one TAC Discrimination json sequence 
# all sequences have the same image combinations for the trials, duration and order is counterbalanced 

discrim_json0_path = '/Users/pmahableshwarkar/Documents/Depth_Project/depth_discrimination/TAC_depth_discrimination_MTurk/discrim_jsons/v2_shuffled_g0_dr.json'

with open(discrim_json0_path) as f:
    discrim_json0 = json.load(f)
    
discrim_image_combos = []
for trial in discrim_json0:
    combo = (trial['image_path_target_0'][29:], trial['image_path_target_1'][29:])
    discrim_image_combos.append(combo)

In [110]:
# dictionary where key is one of the images in a discimrination trial, 
# and the value will be a list of all the subjects that saw both images at the same duration

discrim_trial_subjects_250 = {}
for combo in discrim_image_combos:
    discrim_trial_subjects_250[combo[0]] = []
    
# find subjects that saw BOTH discrimination trials at 250 ms

subjs_250 = raw_TAC_250.subjID.unique()

for subj in subjs_250:
    subj_df = raw_TAC_250.loc[raw_TAC_250['subjID'] == subj]
    for combo in discrim_image_combos:
        img0_path = 'depth_duration_stimuli/' + combo[0]
        dimg0_row = subj_df.loc[subj_df['stimulus'] == img0_path]
        
        img1_path = 'depth_duration_stimuli/' + combo[1]
        dimg1_row = subj_df.loc[subj_df['stimulus'] == img1_path]
        
        
        if len(dimg0_row) == 1:
             if len(dimg1_row) == 1:
                if subj not in discrim_trial_subjects_250[combo[0]]:
                    discrim_trial_subjects_250[combo[0]].append(subj)

        
        
        
        

In [111]:
for key in discrim_trial_subjects_250:
    print(len(discrim_trial_subjects_250[key]))

10
17
14
11
0
11
15
11
0
13
13
12
0
10
13
7
12
5
15
0
9
12
15
11
16
0
11
14
7
0
10
16
13
8
0
11
14
0
10
10


In [112]:
# dictionary where key is one of the images in a discimrination trial, 
# and the value will be a list of all the subjects that saw both images at the same duration

discrim_trial_subjects_1000 = {}
for combo in discrim_image_combos:
    discrim_trial_subjects_1000[combo[0]] = []
    
# find subjects that saw BOTH discrimination trials at 250 ms

subjs_1000 = raw_TAC_1000.subjID.unique()

for subj in subjs_1000:
    subj_df = raw_TAC_1000.loc[raw_TAC_1000['subjID'] == subj]
    for combo in discrim_image_combos:
        img0_path = 'depth_duration_stimuli/' + combo[0]
        dimg0_row = subj_df.loc[subj_df['stimulus'] == img0_path]
        
        img1_path = 'depth_duration_stimuli/' + combo[1]
        dimg1_row = subj_df.loc[subj_df['stimulus'] == img1_path]
        
        
        if len(dimg0_row) == 1:
             if len(dimg1_row) == 1:
                if subj not in discrim_trial_subjects_1000[combo[0]]:
                    discrim_trial_subjects_1000[combo[0]].append(subj)


In [113]:
for key in discrim_trial_subjects_1000:
    print(len(discrim_trial_subjects_1000[key]))

10
17
12
11
0
10
15
12
0
13
13
14
0
11
14
8
12
5
15
0
9
12
15
12
17
0
11
14
7
0
9
16
13
8
0
11
13
0
8
9


In [114]:
# because of outlier cleaning, the numbers are not even across all discrimination trials

In [118]:
raw_TAC_250

Unnamed: 0.1,Unnamed: 0,subjID,experimentName,versionName,sequenceName,url,selected_row,windowWidth,windowHeight,screenWidth,...,duration,actual_depth,depth_estimate,trial_RT,log_sceneDuration,unitSelection,experimentTime,totalTime,age,gender
0,0,710814,DepthScenes,duration_manipulation_targetAtFixation,jsons/ad_3.json,http://100.27.21.195/DepthDuration/v2_depth_du...,8,1600,789,1600,...,250,3.8960,1.5240,3114,249,feet,750381,902117,44,Man
1,3,710814,DepthScenes,duration_manipulation_targetAtFixation,jsons/ad_3.json,http://100.27.21.195/DepthDuration/v2_depth_du...,8,1600,789,1600,...,250,2.1950,1.2192,2458,250,feet,750381,902117,44,Man
2,8,710814,DepthScenes,duration_manipulation_targetAtFixation,jsons/ad_3.json,http://100.27.21.195/DepthDuration/v2_depth_du...,8,1600,789,1600,...,250,4.1050,2.1336,3315,250,feet,750381,902117,44,Man
3,11,710814,DepthScenes,duration_manipulation_targetAtFixation,jsons/ad_3.json,http://100.27.21.195/DepthDuration/v2_depth_du...,8,1600,789,1600,...,250,1.5510,1.2192,2564,251,feet,750381,902117,44,Man
4,17,710814,DepthScenes,duration_manipulation_targetAtFixation,jsons/ad_3.json,http://100.27.21.195/DepthDuration/v2_depth_du...,8,1600,789,1600,...,250,3.2725,1.8288,3131,250,feet,750381,902117,44,Man
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8983,55862,107287,DepthScenes,duration_manipulation_targetAtFixation,jsons/k_1.json,http://34.238.49.31/DepthDuration/v2_depth_dur...,9,1280,609,1280,...,250,4.1050,4.0000,3095,245,meters,1957042,2186724,25,Man
8984,55866,107287,DepthScenes,duration_manipulation_targetAtFixation,jsons/k_1.json,http://34.238.49.31/DepthDuration/v2_depth_dur...,9,1280,609,1280,...,250,4.4670,4.0000,2441,248,meters,1957042,2186724,25,Man
8985,55867,107287,DepthScenes,duration_manipulation_targetAtFixation,jsons/k_1.json,http://34.238.49.31/DepthDuration/v2_depth_dur...,9,1280,609,1280,...,250,1.9680,5.0000,2199,251,meters,1957042,2186724,25,Man
8986,55869,107287,DepthScenes,duration_manipulation_targetAtFixation,jsons/k_1.json,http://34.238.49.31/DepthDuration/v2_depth_dur...,9,1280,609,1280,...,250,2.8000,4.0000,2394,253,meters,1957042,2186724,25,Man


In [116]:
def trial_pivotTable(data):
    """
    Generate pivot tables from data after cleaning and outlier removal 
    Organizes data such that for each individual target (stimulus) the following is calulated:
    - average depth estimation
    - standard deviation
    - standard error 
    """
    
    avg_tables = []
    std_tables = []
    result_tables = []
    cond = 0
    ###### CREATE DF WITH DATA STATISTICS AFTER OUTLIER REMOVAL ######
    for duration in data: #generate pivot tables for data statistics (avg, std, sem)
        actual = pd.pivot_table(duration,  values = ["actual_depth"], columns=['stimulus'], aggfunc=np.mean)
        actual.reset_index()
        actual_renamed = actual.rename(index={'actual_depth': 'Actual Depth'})
        
        avg = pd.pivot_table(duration,  values = ["depth_estimate"], columns=['stimulus'], aggfunc=np.mean)
        avg.reset_index()
        avg_renamed = avg.rename(index={'depth_estimate': 'Average Estimated Depth'})
        
        std = pd.pivot_table(duration, values = ["depth_estimate"], columns = ["stimulus"], aggfunc = np.std)
        #note - std is normalized byN-1 by default (ddof parameter = 1 by default)
        std.reset_index()
        std_renamed = std.rename(index={'depth_estimate': 'Standard Deviation'})
        
        sem = pd.pivot_table(duration, values = ["depth_estimate"], columns = ["stimulus"], aggfunc = 'sem')
        sem.reset_index()
        sem_renamed = sem.rename(index={'depth_estimate': 'Standard Error'})
        
        frames = [avg_renamed, std_renamed, sem_renamed, actual_renamed] #list of pivot tables for a given duration
        result = pd.concat(frames) #merge the pivot tables for a given duration 
        result = result.T #transpose 
        result = result.sort_values(by=['Actual Depth'])

        #Label the data by duration based on condition counter (cond)
        if cond == 0:
            result["Duration"] = 250
        if cond == 1:
            result["Duration"] = 500
        if cond == 2:
            result["Duration"] = 750
        if cond == 3:
            result["Duration"] = 1000
        
        avg_tables.append(avg_renamed) #created for reference (not used in code)
        std_tables.append(std_renamed) #created for reference (not used in code)
        result_tables.append(result) #list of results for all durations 
        cond += 1 
        
    
    return result_tables

In [129]:
raw_250_trial_pivot = trial_pivotTable([raw_TAC_250])
raw_1000_trial_pivot = trial_pivotTable([raw_TAC_1000])


In [130]:
def trial_getxy(data):
    """
    Extracts the data from the dataframes to a list format for plotting. 
    Args:
        df = [250, 500, 750, 1000]
        These data frames are POST all outlier cleaning. 
        
    Returns:
        actualdepths = [x_250, x_500, x_750, x_1000]
        xs = [list of individual targets]
        ys = [y_250, y_500, y_750, y_100]
        stes = [ste_250, ste_500, ste_750, ste_1000]
        
    """
    xs = []
    ys = []
    stes = []
    stds = []
    actualdepths = []
    for table in data:
        x = []
        y = []
        ste = []
        std = []
        depths = []
        for idx, row in table.iterrows():
            
            x.append(idx) #idx is the target (stimulus path)
            
            estim_avg = row["Average Estimated Depth"]
            y.append(estim_avg)
            
            standard_error = row["Standard Error"]
            ste.append(standard_error)
            
            depth = row["Actual Depth"]
            depths.append(depth)
            
            standard_deviation = row["Standard Deviation"]
            std.append(standard_deviation)       
            
        xs.append(x)
        ys.append(y)
        stes.append(ste)
        actualdepths.append(depths)
        stds.append(std)

    return xs, ys, stes, actualdepths, stds

In [131]:
raw_250_final = trial_getxy(raw_250_trial_pivot)
raw_1000_final = trial_getxy(raw_1000_trial_pivot)


In [135]:
trial_plot_data_250 = raw_250_final
trial_plot_data_1000 = raw_1000_final

## Execute this cell to prep for plotting
final_x_250 = trial_plot_data_250[0][0]
final_y_250 = trial_plot_data_250[1][0]
ste_250 = trial_plot_data_250[2][0]
stim_250 = trial_plot_data_250[3][0]
std_250 = trial_plot_data_250[4][0]


final_x_1000 = trial_plot_data_1000[0][0]
final_y_1000 = trial_plot_data_1000[1][0]
ste_1000 = trial_plot_data_1000[2][0]
stim_1000 = trial_plot_data_1000[3][0]
std_1000 = trial_plot_data_1000[4][0]

In [None]:
def discrim_PC(subjects, VE_data, discrim_data):
    n_VE_answerkey_250 = {} # corresponding answer key for discrimination trials  

    for key in all_discrim_performance.keys():
    targetimg0 = key.split('/')[-1]
    folder0 = targetimg0[:-11]
    depth_dur_path0 = 'depth_duration_stimuli/' + folder0 + '/' + targetimg0
    idx0 = np.where(TAC_final_stim_250 == depth_dur_path0)[0][0]
    avg_estim_stim0 = n_TAC_final_y_250[idx0]
    std0 = n_TAC_std_250[idx0]
    
    targetimg1 = all_discrim_performance[key]['stimulus_1'].split('/')[-1]
    folder1 = targetimg1[:-11]
    depth_dur_path1 = 'depth_duration_stimuli/' + folder1 + '/' + targetimg1
    idx1= np.where(TAC_final_stim_250 == depth_dur_path1)[0][0]
    avg_estim_stim1 = n_TAC_final_y_250[idx1]
    std1 = n_TAC_std_250[idx1]

    if avg_estim_stim0 < avg_estim_stim1:
        # Which target is CLOSER to you?
        answer = targetimg0
    if avg_estim_stim0 == avg_estim_stim1:
        print(targetimg0, targetimg1)
    if avg_estim_stim0 > avg_estim_stim1:
        answer = targetimg1

    n_VE_answerkey_250[key] = {'stimulus_1': targetimg1,
                             'stimulus_0_avg_estim': avg_estim_stim0,
                             'stimulus_1_avg_estim': avg_estim_stim1,
                             'answer': answer,
                             'std0': std0,
                             'std1': std1}