In [2]:
# Behavioral analyses IntMem - MvK 2017

# Read in necessary stuff
#%pylab inline
from ipykernel import kernelapp as app
import pandas as pd
import numpy as np
import statsmodels.api as sm
#import matplotlib.pyplot as plt
from scipy import stats
#import pylab as pl
import seaborn as sns
import shutil

In [1]:
# Read in logfile for recall
subjects = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,29,30] #fMRI batch

#subjects = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
for s in subjects: 
    
    #print('Running subject s%i' %s)
    # read in recall
    names = ['Object', 'Congruency', 'Empty', 'Recognition', 'Confidence', 'RT', 'Pic desc', 'RT.1', 'Pic rec', 'Confidence2', 'RT2']
    df = pd.read_csv('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s%i/s%i_recall1.txt' % (s,s), sep = '\t', 
                     header = None, names = names, skiprows = 4, skipfooter = 3, engine = 'python')

    # for s4: missed items (because this participant had a technical issue)
    if s == 4: 
        s4_missed_items = pd.read_excel('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s4/missed_items.xlsx',
                     header = None, names = names)   
        print(df)
        df = pd.merge(df, s4_missed_items)#, how = 'outer')

    # delete second column (accidental space) and RT1 (not useful)
    columns = ['Empty', 'RT.1']
    df = df.drop(columns, axis = 1)

    # select only learned items and sort by object name, store in all_data    
    all_data = df[(df.Congruency == 'con') | (df.Congruency == 'inc')]
    all_data = all_data.sort_values(by = ['Object'])
    all_data = all_data.reset_index(drop=True)

    # create separate df with misses and no responses and change format to fit pic desc excels (which don't contain misses)
    miss_nr = all_data[(all_data.Recognition != 'Hit')]
    columns = ['Congruency', 'Recognition', 'Confidence', 'RT', 'Pic rec', 'Confidence2', 'RT2']
    miss_nr = miss_nr.drop(columns, axis = 1)
    miss_nr['Points'] = 0 
    miss_nr['FM_con'] = 0 
    miss_nr['FM_inc'] = 0

    # read in ratings for picture description and merge with recall_miss_nr so it can be merged with all_data
    xls = pd.ExcelFile('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s%i/s%i_picture_recall1_rater1.xlsx' % (s,s))
    con = xls.parse(0)
    inc = xls.parse(1)
    picdesc_rater1 = pd.merge(con,inc, how = 'outer')
    xls = pd.ExcelFile('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s%i/s%i_picture_recall1_rater2.xlsx' % (s,s))
    con = xls.parse(0)
    inc = xls.parse(1)
    picdesc_rater2 = pd.merge(con,inc, how = 'outer')

    picdesc = picdesc_rater1
    picdesc['Points'] = (picdesc_rater1['Punten'] + picdesc_rater2['Punten'])/2
    picdesc['FM_con'] = (picdesc_rater1['FM con'] + picdesc_rater2['FM con'])/2
    picdesc['FM_inc'] = (picdesc_rater1['FM inc'] + picdesc_rater2['FM incon'])/2 # note that rater 2 used a different column name
    columns = ['Punten', 'FM con', 'FM inc']
    picdesc = picdesc.drop(columns, axis = 1)
    picdesc = pd.merge(picdesc, miss_nr, how = 'outer')
    picdesc = picdesc.sort_values(by = ['Object'])
    picdesc = picdesc.reset_index(drop=True)

    # add ratings to recall_learned
    all_data[['Points', 'FM_con', 'FM_inc']] = picdesc[['Points','FM_con','FM_inc']]
    all_data.Points = all_data.Points.astype(int)
    all_data.FM_con = all_data.FM_con.astype(int)
    all_data.FM_inc = all_data.FM_inc.astype(int)
    all_data = all_data[['Object','Congruency','Recognition','Confidence','RT','Pic desc','Points','FM_con','FM_inc',
                                   'Pic rec', 'Confidence2','RT2']] # reorder columns

    # read in congruency ratings
    con_ratings = pd.read_csv('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s%i/s%i_con.txt' % (s,s), sep = '\t', 
                     header = 1, skipfooter = 2, engine = 'python')
    con_ratings = con_ratings.sort_values(by = ['Scene']) # note here that the column names in the logfiles are inverted (i.e. object = scene and v.v.)
    con_ratings = con_ratings.reset_index(drop=True)
    all_data['Con_rating'] = con_ratings['Answer']
    all_data.Con_rating = all_data.Con_rating.astype(int)
    all_data['Con_subj'] = 'test'
    all_data['Con_subj']

    # add row with subjective "con" and "inc (or N/A in case of a "3" answer) for further analyses
    def congruency(row):
        if row['Con_rating'] < 3: # less than 3 = inc
            return 'inc'
        elif 3 < row['Con_rating'] < 6: # more than 3 and less than 6 (Enter) = con
            return 'con'
        else:
            return 'N/A' # answered 3 (no congruency) or 6 (Enter)

    all_data['Con_subj'] = all_data.apply (lambda row: congruency (row),axis=1)

    # read in AC-encoding
    ACencoding = pd.read_csv('C:/Users/mkn556/Documents/Research/Projects/IntMem/VU/logfiles/s%i/s%i_ACencoding.txt' % (s,s), sep = '\t', 
                         header = 4, skipfooter = 3, engine = 'python')

    # delete some rows with header information, reset index, and sort
    ACencoding.drop(ACencoding.index[[0,65,66,67,68,69,70,127]], inplace=True)

    # assign Run numbers to df and sort
    run = np.concatenate((np.ones(64, dtype=np.int),np.ones(56, dtype=np.int)*2),axis = 0)
    ACencoding['Run'] = run
    ACencoding = ACencoding.sort_values(by = ['Object'])
    ACencoding = ACencoding.reset_index(drop=True)

    # move columns Reactivation, RT and onset to recall df
    all_data[['Reactivation','RT_enc','Onset','Run']] = ACencoding[['Reactivation','RT','Onset','Run']]
    all_data.Reactivation = all_data.Reactivation.astype(int)
    all_data.RT_enc = all_data.RT_enc.astype(int)
    all_data.Onset = all_data.Onset.astype(int)
    all_data.Run = all_data.Run.astype(int)
    
    # change Reactivation "4" scores (no answer) to "NaN"
    all_data.loc[all_data.Reactivation==4, ['Reactivation', 'RT_enc']]= np.nan

    # copy No responses and delete from df so we can do analyses on remaining trials but put these back later
    NRs = all_data[all_data.Recognition == 'No response'] 
    all_data = all_data[all_data.Recognition != 'No response'] 
    all_data = all_data.reset_index(drop=True)

    # Calculate memory score and add to df all_data
    memory_scores = []
    for i in range(0,len(all_data)):
        memory_score = 0
        if all_data['Recognition'][i] == 'Hit': # Recognition score
            memory_score = memory_score + (4-all_data['Confidence'][i])
        if all_data['Pic rec'][i] == 'Hit': # Picture recognition score, confidence randomized
            if all_data['Confidence2'][i] > 3.0:
                memory_score = memory_score + (all_data['Confidence2'][i] - 3) # 6 is highest
            else:
                memory_score = memory_score + (4 - all_data['Confidence2'][i]) # 1 is highest

        memory_score = memory_score + all_data['Points'][i]*3
        memory_scores.append(memory_score)

    #add memory scores to df all_data
    all_data['Memory_score'] = memory_scores
    all_data.Memory_score = all_data.Memory_score.astype(int)

    # delete rows for s4 again (those that were not answered during recall)
    if s == 4:
        all_data = all_data[~all_data['Object'].isin(s4_missed_items['Object'])]
        all_data = all_data.reset_index(drop=True)
        
    # end for loop, now final dataframe is read in and analyses can be done
    all_data = all_data[(all_data.Recognition == 'Hit')] # FOR FA ANALYSES
    all_data = all_data.reset_index(drop=True) # FOR FA ANALYSES 
    means = all_data.groupby('Con_subj')['Memory_score', 'Reactivation', 'RT_enc', 'FM_con', 'FM_inc'].mean() # calculate means
    median_MS = all_data.groupby('Con_subj')['Memory_score'].median() # calculate median for median split
    trials_hit_con = all_data.groupby('Con_subj')['Memory_score'].apply(lambda column: sum(column >= means['Memory_score'].con)) # calculate amount of trials
    trials_miss_con = all_data.groupby('Con_subj')['Memory_score'].apply(lambda column: sum(column < means['Memory_score'].con)) # calculate amount of trials 
    trials_hit_inc = all_data.groupby('Con_subj')['Memory_score'].apply(lambda column: sum(column >= means['Memory_score'].inc)) # calculate amount of trials
    trials_miss_inc = all_data.groupby('Con_subj')['Memory_score'].apply(lambda column: sum(column < means['Memory_score'].inc)) # calculate amount of trials
    
    print(means)
    #test_df = all_data[all_data['Memory_score']>0]
    #trials_reac = test_df.groupby('Con_subj')['Reactivation'].apply(lambda column: sum(column > 0)) # calculate amount of trials based on reactivation score
        
    if s == 1: # start dataframes with groupdata
        final_df = pd.DataFrame(columns = ['MS_con', 'MS_inc', 'MR_con', 'MR_inc','sum_con', 'sum_inc', 'trials_hit_con', 
                                           'trials_hit_inc', 'trials_miss_con', 'trials_miss_con', 'RT_con', 'RT_inc', 
                                           'FM_con_con','FM_con_inc','FM_inc_con','FM_inc_inc'])
        graph_df = all_data.groupby(['Con_subj','Reactivation'])['Points'].mean()
        graph_df = graph_df.to_frame() 
        graph_df = graph_df.reset_index() 

    # fill dfs with subject-specific information
    final_df.loc['s%i' %s] = [means['Memory_score'].con,means['Memory_score'].inc,means['Reactivation'].con,
                              means['Reactivation'].inc, sum(all_data.Con_subj == 'con'), sum(all_data.Con_subj == 'inc'),
                              trials_hit_con.con,trials_hit_inc.inc, trials_miss_con.con, trials_miss_inc.inc, means['RT_enc'].con,
                              means['RT_enc'].inc, means['FM_con'].con, means['FM_inc'].con, means['FM_con'].inc, means['FM_inc'].inc]
    if s != 1: 
        graph_df2 = all_data.groupby(['Con_subj','Reactivation'])['Points'].mean()
        graph_df2 = graph_df2.to_frame()    
        graph_df2 = graph_df2.reset_index() 
        graph_df = graph_df.append(graph_df2,ignore_index=True)
    
    # add not responses back to all_data DF and save for further analyses
    all_data = all_data.append(NRs)
    #all_data.to_excel('Z:/IntMem/data/s%i/logfiles/DF_s%i.xlsx' %(s,s))
    print(final_df)

NameError: name 'pd' is not defined

In [None]:
# final group outcomes
graph_df = graph_df[graph_df.Con_subj!= 'N/A']
graph_df.Reactivation = graph_df.Reactivation.astype(int)
graph_df = graph_df.rename(columns={'Con_subj': 'Congruency'})
graph_df = graph_df.replace(to_replace='con', value='Congruent')
graph_df = graph_df.replace(to_replace='inc', value='Incongruent')
#graph_df['Points'] = graph_df['Points']*100
graph_df
#graph_df.to_excel('C:/Users/mkn556/Dropbox/Research/Running projects/IntMem/VU/analyses/congruencyxreactivation_assmemory.xlsx')

# make graph and save
sns.set(style="ticks", font_scale = 2)
g2 = sns.factorplot(x="Reactivation", y="Memory_score", hue="Congruency", data=graph_df, ci = 48.47,
       capsize=0, palette="hot", size=9, aspect=1.2, dodge = .3, legend = False, legend_out = False)
sns.stripplot(x="Reactivation", y="Memory_score", hue="Congruency", data=graph_df, jitter = True,
       palette="hot")
g2.set(ylim=(-5, 85))
g2.set_ylabels("Associative memory (%)")
g2.set_xticklabels(["Strong", "A bit", "None"])
g2.add_legend()

#savefig("C:/Users/mkn556/Dropbox/Research/Conferences & Workshops/NVP 2017/fMRI_fig.png")