# Model HALVES
## Compares periods of learning and nonlearning between early (run1) and late (run2)

### Important notes for noobs:
#### 1. 80 conditional trials per stimulus set anchor entire script logic - 40 per run (2 runs).
#### 2. Certain conditional trials are considered "bad" (i.e. if they fall on the first or in last 3 trials), and must be avoided.
#### 3. This script is a clusterfuck of indexing... pay very close attention to what the values of each array represent (see comments). Failing to do so *will* result in major problems!
#### 4. If you get lost, use print statements to walk yourself through the logic. It is sound... just complicated.

In [None]:
%matplotlib inline
import shutil 
import os
from os.path import join, split, basename
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from glob import glob
from pylab import *

In [None]:
def onset_sort(x):
    x = x.split('_')[-1]
    return(x[5:-8])

subs = ['WMAZE_001']
'''
subs = ['WMAZE_002', 'WMAZE_004', 'WMAZE_005', 'WMAZE_006',
        'WMAZE_007', 'WMAZE_008', 'WMAZE_009', 'WMAZE_010', 'WMAZE_012',
        'WMAZE_017', 'WMAZE_018', 'WMAZE_019', 'WMAZE_020', 'WMAZE_021',
        'WMAZE_022', 'WMAZE_023', 'WMAZE_024', 'WMAZE_026', 'WMAZE_027']
'''

sets = ['set1', 'set2', 'set3']
runs = ['run1', 'run2', 'run3', 'run4', 'run5', 'run6']

#collect all files for each subject
for sub in subs:
    sub_dir = '/home/data/madlab/data/mri/wmaze/' #base directory
    save_dir = '/home/data/madlab/data/mri/wmaze/scanner_behav/{0}/MRthesis/model_HALVES/'.format(sub)
    
    if not os.path.exists(save_dir): 
        os.makedirs(save_dir)
    
    #grab the derivative files created from the set-based learning analysis
    frst_deriv_files = glob(join(sub_dir, 'scanner_behav/{0}/Bprime_pmode_set*.txt'.format(sub))) 
    frst_deriv_files.sort()     
    #grab the learning curve file for B trials
    learning_files = glob(join(sub_dir, 'scanner_behav/{0}/B_pmode_set*.txt'.format(sub))) 
    learning_files.sort()    
    #grab the behavioral files for all runs
    behav_runs = glob(join(sub_dir, 'scanner_behav/{0}/{0}_wmazebl_2015*.txt'.format(sub))) 
    behav_runs.sort()
    
    
    for i, curr_set in enumerate(sets): #load derivative and learning curve files
        deriv_file = np.loadtxt(frst_deriv_files[i])
        learning_curve = np.loadtxt(learning_files[i]) 
                
        #load behavioral files into Pandas
        behav_run1 = pd.read_table(behav_runs[i*2])
        behav_run2 = pd.read_table(behav_runs[i*2+1])
                
        #info concerning onset time
        behav_os = np.empty(320, dtype=object)
        behav_os[:160] = behav_run1['StimOnset'].values
        behav_os[160:] = behav_run2['StimOnset'].values        
        #info concerning subject response
        behav_resp = np.empty(320, dtype=object)
        behav_resp[:160] = behav_run1['Resp'].values
        behav_resp[160:] = behav_run2['Resp'].values        
        #info concerning trial type
        behav_type = np.empty(320, dtype=object)
        behav_type[:160] = behav_run1['TrialType'].values
        behav_type[160:] = behav_run2['TrialType'].values
                
        b_indices = np.where((behav_type == 'B'))[0] #Bs in the original dataset without preceeding BL trials
        trial_shift = behav_type[:-1] 
        trial_shift = np.insert(trial_shift, 0, -1) #shifted array to identify preceding BLs
        b_BL_indices = np.where((behav_type == 'B') & (trial_shift == 'BL'))[0] #B trials with preceeding BLs       
        
        
        #isolate bad Bs for removal in learning curve/derivative files
        #value represents index among all trials (original behavioral file) 
        bad_Bs = [] 
        bad_Bs.extend(b_BL_indices)
        for curr_B in b_indices:
            if not curr_B in bad_Bs:
                if behav_resp[curr_B] == 'NR': #identify in B trials which are non-response
                    bad_Bs.append(curr_B)    
                if curr_B in [0, 157, 158, 159, 160, 317, 318, 319]: #indices if B trial comes first 
                    bad_Bs.append(curr_B)
        bad_Bs.sort() 
        #print bad_Bs           
        
        
        #value represents index among only B trials 
        #used to match and remove bad Bs from learning curve/derivative arrays        
        bad_B_ind = [j for j, f in enumerate(b_indices) if f in bad_Bs] #get indices for bad Bs within group of Bs
        bad_B_ind.sort()
        bad_B_ind = bad_B_ind[::-1] #reverse order of Bs to be removed 
        #print bad_B_ind 
          
            
        #LEARNING CURVE WITH BAD B'S REMOVED
        temp2 = list(learning_curve) #create a temp version of learning_curve
        for curr_bad_B in bad_B_ind: 
            temp2.pop(curr_bad_B) #pop out the bad Bs starting from the end   
        new_learning = np.array(temp2) #save without the removed Bs
        #print new_learning
     
        
        #DERIV FILES WITH BAD B'S REMOVED
        temp = list(deriv_file)
        for curr_bad_B in bad_B_ind:
            temp.pop(curr_bad_B)   
        new_deriv = np.array(temp[1:]) 
        print len(new_deriv)
        learning = np.where(new_deriv > 0)[0]
        print len(learning), learning
        nonlearning = np.where(new_deriv <= 0)[0]
        #use list comprehension to quickly identify relevant B trials for each regressor
        run1_learn = [j for j, f in enumerate(learning) if f <= 40] #any B in learn with an index of < 40 (run 1)
        run2_learn = [j for j, f in enumerate(learning) if f > 40] #any B in learn with an index of > 40 (run 2)
        run1_nonlearn = [j for j, f in enumerate(nonlearning) if f <= 40]
        run2_nonlearn = [j for j, f in enumerate(nonlearning) if f > 40]
        #print "Run1", run1_learn
        #print "Run2", run2_learn
         
        #remove the bad Bs from the B-list
        temp4 = list(b_indices)
        for curr_bad_B in bad_B_ind:
            temp4.pop(curr_bad_B)
        good_Bs = np.array(temp4)
        #print "Good Bs", len(good_Bs)

        run1_learn_ind = good_Bs[run1_learn]
        run1_fixed_learn = [x - 1 for x in run1_learn_ind]
        #print "learn ind", run1_learn_ind
        
        run2_learn_ind = good_Bs[run2_learn]
        run2_fixed_learn = [x - 1 for x in run2_learn_ind]
        run1_nonlearn_ind = good_Bs[run1_nonlearn]
        run1_fixed_nonlearn = [x - 1 for x in run1_nonlearn_ind]
        run2_nonlearn_ind = good_Bs[run2_nonlearn]
        run2_fixed_nonlearn = [x - 1 for x in run2_nonlearn_ind]
        
        run1_learn_os = behav_os[run1_fixed_learn]
        run2_learn_os = behav_os[run2_fixed_learn]
        run1_nonlearn_os = behav_os[run1_fixed_nonlearn]
        run2_nonlearn_os = behav_os[run2_fixed_nonlearn]
        
        
        run1_learn_mtrx = np.vstack((run1_learn_os,
                                     np.ones(len(run1_learn_os)) * 3.0,
                                     np.ones(len(run1_learn_os)))).T
        
        run2_learn_mtrx = np.vstack((run2_learn_os,
                                     np.ones(len(run2_learn_os)) * 3.0,
                                     np.ones(len(run2_learn_os)))).T
        
        run1_nonlearn_mtrx = np.vstack((run1_nonlearn_os,
                                        np.ones(len(run1_nonlearn_os)) * 3.0,
                                        np.ones(len(run1_nonlearn_os)))).T
        
        run2_nonlearn_mtrx = np.vstack((run2_nonlearn_os,
                                        np.ones(len(run2_nonlearn_os)) * 3.0,
                                        np.ones(len(run2_nonlearn_os)))).T
        

        np.savetxt(save_dir + '{0}_run1_learn.txt'.format(curr_set), run1_learn_mtrx, delimiter='\t', fmt='%.4f')
        np.savetxt(save_dir + '{0}_run2_learn.txt'.format(curr_set), run2_learn_mtrx, delimiter='\t', fmt='%.4f')
        np.savetxt(save_dir + '{0}_run1_nonlearn.txt'.format(curr_set), run1_nonlearn_mtrx, delimiter='\t', fmt='%.4f')
        np.savetxt(save_dir + '{0}_run2_nonlearn.txt'.format(curr_set), run2_nonlearn_mtrx, delimiter='\t', fmt='%.4f')      