# Creating a feature matrix from a DB Query

In [1]:
import sys
sys.path.append('/Users/Sabatini Lab/GitHub/mouse_bandit/data_preprocessing_code')
sys.path.append('/Users/Sabatini Lab/GitHub/mouse_bandit')
import support_functions as sf
import numpy as np
import pandas as pd
import scipy as sp
import bandit_preprocessing as bp
from sklearn import preprocessing
import sys
import os
%matplotlib inline 

## Retrieve names of sessions you want from the DB

In [2]:
#load in data base
db = pd.read_csv('/Users/Sabatini Lab/GitHub/mouse_bandit/session_record.csv',index_col=0)

### Query all 80-20 sessions where performance > 0.7 and block structure was 50

In [3]:
r = db[((db['Left Reward Prob'] == 0.8) |  (db['Right Reward Prob'] == 0.8))].copy()
r = r[r['p(high Port)'] > 0.85].copy()
r = r[r['Block Range Min'] == 50].copy()
r = r[r['Mouse ID'] == 'harry']

session_names = r['Session ID'].values

In [4]:
r

Unnamed: 0,Owner,Session ID,Mouse ID,Date,Phase,Laser Stim Prob,Left Reward Prob,Right Reward Prob,Block Range Min,Block Range Max,No. Trials,No. Blocks,No. Rewards,p(high Port),Decision Window Duration,Min Inter-trial-interval,Left Solenoid Duration,Right Solenoid Duration
167,shay,07252016_harry,harry,7/25/2016 0:00,2,0.0,0.2,0.8,50,50,579,8,419,0.88,2,1.0,45,40
475,shay,07082016_harry,harry,7/8/2016 0:00,2,0.0,0.8,0.2,50,50,538,8,401,0.88,2,1.0,45,40
496,shay,07132016_harry,harry,7/13/2016 0:00,2,0.0,0.8,0.2,50,50,653,8,445,0.89,2,1.0,45,40
517,shay,07182016_harry,harry,7/18/2016 0:00,2,0.0,0.8,0.2,50,50,520,7,360,0.88,2,1.0,45,40
524,shay,07192016_harry,harry,7/19/2016 0:00,2,0.0,0.8,0.2,50,50,800,11,574,0.87,2,1.0,45,40


In [5]:
r.shape

(5, 18)

## load in csv files (from running exportTrials.m)

In [6]:
'''
load in trial data
'''
columns = ['Elapsed Time (s)','Since last trial (s)','Trial Duration (s)','Port Poked','Right Reward Prob','Left Reward Prob','Reward Given']

root_dir = '/Users/Sabatini Lab/GitHub/mouse_bandit/data/trial_data'

trial_df = []

for session in session_names:
    full_name = session + '_trials.csv'
    
    path_name = os.path.join(root_dir,full_name)
    
    trial_df.append(pd.read_csv(path_name,names=columns))

mouse_ids = r['Mouse ID'].values

In [7]:
len(trial_df)

5

## convert into 1 feature matrix

In [8]:
for i,df in enumerate(trial_df):
    
    curr_feature_matrix = bp.create_feature_matrix(df,10,mouse_ids[i],session_names[i],feature_names='Default')
    
    if i == 0:
        master_matrix = curr_feature_matrix.copy()
    else:
        master_matrix = master_matrix.append(curr_feature_matrix)
    

In [9]:
master_matrix.shape

(3040, 53)

In [10]:
master_matrix['Higher p port'].mean()

0.88092105263157894

In [11]:
master_matrix

Unnamed: 0,Mouse ID,Session ID,Trial,Block Trial,Block Reward,Port Streak,Reward Streak,10_Port,10_Reward,10_ITI,...,1_Port,1_Reward,1_ITI,1_trialDuration,0_ITI,0_trialDuration,Decision,Switch,Higher p port,Reward
0,harry,07252016_harry,11.0,11.0,3.0,1.0,-1.0,0.304,1.0,0.0,...,0.319,1.0,0.0,1.304,0.258,2.335,0.0,1.0,1.0,1.0
1,harry,07252016_harry,12.0,12.0,4.0,1.0,1.0,0.261,1.0,0.0,...,0.258,0.0,1.0,2.335,0.304,1.793,0.0,0.0,1.0,0.0
2,harry,07252016_harry,13.0,13.0,4.0,2.0,-1.0,0.319,1.0,0.0,...,0.304,0.0,0.0,1.793,0.261,1.613,0.0,0.0,1.0,0.0
3,harry,07252016_harry,14.0,14.0,4.0,3.0,-2.0,0.428,1.0,0.0,...,0.261,0.0,0.0,1.613,0.319,1.331,1.0,1.0,0.0,0.0
4,harry,07252016_harry,15.0,15.0,4.0,1.0,-3.0,0.302,1.0,0.0,...,0.319,1.0,0.0,1.331,0.428,1.545,1.0,0.0,0.0,0.0
5,harry,07252016_harry,16.0,16.0,4.0,2.0,-4.0,0.474,1.0,0.0,...,0.428,1.0,0.0,1.545,0.302,1.428,1.0,0.0,0.0,0.0
6,harry,07252016_harry,17.0,17.0,4.0,3.0,-5.0,0.243,0.0,1.0,...,0.302,1.0,0.0,1.428,0.474,2.241,0.0,1.0,1.0,1.0
7,harry,07252016_harry,18.0,18.0,5.0,1.0,1.0,0.228,0.0,1.0,...,0.474,0.0,1.0,2.241,0.243,2.532,0.0,0.0,1.0,1.0
8,harry,07252016_harry,19.0,19.0,6.0,2.0,2.0,0.320,0.0,1.0,...,0.243,0.0,1.0,2.532,0.228,1.348,0.0,0.0,1.0,1.0
9,harry,07252016_harry,20.0,20.0,7.0,3.0,3.0,0.262,1.0,0.0,...,0.228,0.0,1.0,1.348,0.320,2.285,0.0,0.0,1.0,1.0


## Save combined feature matrix  

In [13]:
master_matrix.to_csv(os.path.join(root_dir,'harry_8020_high.csv'))