# Creating a feature matrix from a DB Query

In [13]:
import sys
sys.path.append('/Users/Sabatini Lab/GitHub/mouse_bandit/data_preprocessing_code')
sys.path.append('/Users/Sabatini Lab/GitHub/mouse_bandit')
import support_functions as sf
import numpy as np
import pandas as pd
import scipy as sp
import bandit_preprocessing as bp
from sklearn import preprocessing
import sys
import os
%matplotlib inline 

## Retrieve names of sessions you want from the DB

In [14]:
#load in data base
db = pd.read_csv('/Users/Sabatini Lab/GitHub/mouse_bandit/mike_session_record.csv',index_col=0)

### Query all 90-10 sessions where performance > 0.7 block structure was 50 and had no laser trials

In [16]:
r = db[((db['Left Reward Prob'] == 0.9) |  (db['Right Reward Prob'] == 0.9))].copy()
r = r[r['p(high Port)'] > 0.7].copy()
r = r[r['Block Range Min'] == 50].copy()
r = r[r['Laser Stim Prob'] == 0].copy()

session_names = r['Session ID'].values 

In [17]:
r.tail(10)

Unnamed: 0,Owner,Session ID,Mouse ID,Date,Phase,Laser Stim Prob,Left Reward Prob,Right Reward Prob,Block Range Min,Block Range Max,No. Trials,No. Blocks,No. Rewards,p(high Port),Decision Window Duration,Min Inter-trial-interval,Left Solenoid Duration,Right Solenoid Duration
1133,mike,08102017_SOM45,SOM45,2017-08-10 00:00:00,2,0.0,0.1,0.9,50,50,537,8,434,0.87,2,1.0,40,40
1143,mike,08142017_SOM41,SOM41,2017-08-14 00:00:00,2,0.0,0.9,0.1,50,50,475,7,383,0.89,2,1.0,40,40
1144,mike,08142017_SOM44,SOM44,2017-08-14 00:00:00,2,0.0,0.1,0.9,50,50,366,5,291,0.87,2,1.0,40,40
1145,mike,08142017_SOM45,SOM45,2017-08-14 00:00:00,2,0.0,0.9,0.1,50,50,604,9,481,0.87,2,1.0,40,40
1154,mike,08162017_SOM41,SOM41,2017-08-16 00:00:00,2,0.0,0.1,0.9,50,50,546,8,432,0.87,2,1.0,40,40
1155,mike,08162017_SOM44,SOM44,2017-08-16 00:00:00,2,0.0,0.9,0.1,50,50,296,4,216,0.76,2,1.0,40,40
1156,mike,08162017_SOM45,SOM45,2017-08-16 00:00:00,2,0.0,0.9,0.1,50,50,576,9,447,0.82,2,1.0,40,40
1166,mike,08182017_SOM41,SOM41,2017-08-18 00:00:00,2,0.0,0.9,0.1,50,50,485,7,397,0.88,2,1.0,40,40
1167,mike,08182017_SOM44,SOM44,2017-08-18 00:00:00,2,0.0,0.9,0.1,50,50,293,4,235,0.87,2,1.0,40,40
1168,mike,08182017_SOM45,SOM45,2017-08-18 00:00:00,2,0.0,0.9,0.1,50,50,636,9,490,0.84,2,1.0,40,40


In [18]:
r.shape

(118, 18)

## load in csv files (from running exportTrials.m) added in 'Laser Given' column in trials for new data format following opto changes

In [19]:
'''
load in trial data
'''
columns = ['Elapsed Time (s)','Since last trial (s)','Trial Duration (s)','Port Poked','Right Reward Prob','Left Reward Prob','Reward Given', 'Laser Given']

root_dir = '/Users/Sabatini Lab/GitHub/mouse_bandit/data/mike_data/trial_data'

trial_df = []

for session in session_names:
    full_name = session + '_trials.csv'
    
    path_name = os.path.join(root_dir,full_name)
    
    trial_df.append(pd.read_csv(path_name,names=columns))

mouse_ids = r['Mouse ID'].values

In [20]:
len(trial_df)

118

## convert into 1 feature matrix

In [21]:
for i,df in enumerate(trial_df):
    
    curr_feature_matrix = bp.create_feature_matrix(df,10,mouse_ids[i],session_names[i],feature_names='Default')
    
    if i == 0:
        master_matrix = curr_feature_matrix.copy()
    else:
        master_matrix = master_matrix.append(curr_feature_matrix)
    

In [22]:
master_matrix.shape

(50425, 53)

In [23]:
master_matrix['Higher p port'].mean()

0.85035200793257315

## Stephenson et al acquired ~6500 trials per animal

In [24]:
master_matrix['Session ID'].unique()

array(['06142017_SOM42', '06152017_SOM42', '06162017_SOM42',
       '06192017_SOM42', '06192017_SOM45', '06202017_SOM42',
       '06202017_SOM44', '06212017_SOM42', '06212017_SOM45',
       '06222017_SOM42', '06222017_SOM44', '06222017_SOM45',
       '06232017_SOM42', '06232017_SOM44', '06232017_SOM45',
       '06262017_PV30', '06262017_SOM41', '06262017_SOM42',
       '06262017_SOM44', '06262017_SOM45', '06272017_PV30',
       '06272017_SOM41', '06272017_SOM42', '06272017_SOM44',
       '06272017_SOM45', '06282017_PV30', '06282017_SOM41',
       '06282017_SOM42', '06282017_SOM44', '06282017_SOM45',
       '06292017_PV30', '06292017_SOM41', '06292017_SOM42',
       '06292017_SOM44', '06292017_SOM45', '06302017_PV30',
       '06302017_SOM41', '06302017_SOM42', '06302017_SOM44',
       '06302017_SOM45', '07032017_PV30', '07032017_SOM42',
       '07032017_SOM44', '07032017_SOM45', '07042017_PV30',
       '07042017_SOM41', '07042017_SOM42', '07042017_SOM44',
       '07052017_PV30', '070520

In [25]:
master_matrix.tail(10)

Unnamed: 0,Mouse ID,Session ID,Trial,Block Trial,Block Reward,Port Streak,Reward Streak,10_Port,10_Reward,10_ITI,...,1_Port,1_Reward,1_ITI,1_trialDuration,0_ITI,0_trialDuration,Decision,Switch,Higher p port,Reward
616,SOM45,08182017_SOM45,627.0,49.0,36.0,42.0,2.0,0.622,1.0,1.0,...,0.728,1.0,1.0,1.705,0.687,1.349,1.0,0.0,1.0,1.0
617,SOM45,08182017_SOM45,628.0,50.0,37.0,43.0,3.0,0.657,1.0,1.0,...,0.687,1.0,1.0,1.349,0.622,3.488,1.0,0.0,1.0,1.0
618,SOM45,08182017_SOM45,629.0,51.0,38.0,44.0,4.0,0.654,1.0,1.0,...,0.622,1.0,1.0,3.488,0.657,2.005,1.0,0.0,1.0,1.0
619,SOM45,08182017_SOM45,630.0,52.0,39.0,45.0,5.0,0.69,1.0,1.0,...,0.657,1.0,1.0,2.005,0.654,1.53,1.0,0.0,1.0,1.0
620,SOM45,08182017_SOM45,631.0,53.0,40.0,46.0,6.0,0.676,1.0,1.0,...,0.654,1.0,1.0,1.53,0.69,2.21,1.0,0.0,1.0,1.0
621,SOM45,08182017_SOM45,632.0,54.0,41.0,47.0,7.0,0.667,1.0,1.0,...,0.69,1.0,1.0,2.21,0.676,1.668,1.0,0.0,1.0,1.0
622,SOM45,08182017_SOM45,633.0,55.0,42.0,48.0,8.0,0.374,1.0,1.0,...,0.676,1.0,1.0,1.668,0.667,3.233,1.0,0.0,1.0,1.0
623,SOM45,08182017_SOM45,634.0,56.0,43.0,49.0,9.0,0.712,1.0,0.0,...,0.667,1.0,1.0,3.233,0.374,2.411,1.0,0.0,1.0,1.0
624,SOM45,08182017_SOM45,635.0,57.0,44.0,50.0,10.0,0.68,1.0,1.0,...,0.374,1.0,1.0,2.411,0.712,26.857,1.0,0.0,1.0,1.0
625,SOM45,08182017_SOM45,636.0,58.0,45.0,51.0,11.0,0.678,1.0,1.0,...,0.712,1.0,1.0,26.857,0.68,1.253,1.0,0.0,1.0,1.0


## Save combined feature matrix  

In [26]:
master_matrix.to_csv(os.path.join(root_dir,'all_9010_high.csv'))

In [124]:
#master_matrix.to_csv(os.path.join(root_dir,'all_8020_high.csv'))