# Ingest acquisitions session path on bl_new_acquisition.Acquisitions

In [1]:
from scripts.conf_file_finding import try_find_conf_file
try_find_conf_file()

Local configuration file found !!, no need to run the configuration (unless configuration has changed)


## Connection to DB

In [2]:
import datajoint as dj
import pandas as pd
import utility.path_utility as pu
import os
import pathlib
import numpy as np
import bl_pipeline.acquisition as acq

new_lab = dj.create_virtual_module('new_lab', 'bl_new_lab')
new_subject = dj.create_virtual_module('new_subject', 'bl_new_subject')
new_acquisition = dj.create_virtual_module('new_acquisition', 'bl_new_acquisition')

#bdata          = dj.create_virtual_module('bdata', 'bl_bdata')
#shadow_acquisition = dj.create_virtual_module('shadow_acquisition', 'bl_shadow_acquisition')
#new_acquisition = dj.create_virtual_module('new_acquisition', 'bl_new_acquisition')
#new_lab = dj.create_virtual_module('new_lav', 'bl_new_lab')
#ratinfo        = dj.create_virtual_module('ratinfo', 'bl_ratinfo')

Connecting alvaros@datajoint01.pni.princeton.edu:3306


### 1. Get all directories with raw acquisition from root_ephys_directory

In [27]:
root_dir = pathlib.Path(dj.config['custom']['ephys_root_data_dir'][0])
posix_root_dir = pathlib.Path(root_dir).as_posix()

fields_t_acq  = pd.DataFrame.from_dict(acq.Acquisitions.heading.attributes, orient='index')
acquisitions_found = 0
acquisition_df = pd.DataFrame(columns=fields_t_acq.index.to_list()) 
acquisition_df = acquisition_df.drop(columns=['acquisition_id', 'user_id', 'acquisition_sessid'])

for root, dirs, files in os.walk(root_dir):
    for dirname in dirs:
        aux_dir = pathlib.Path(os.path.join(root, dirname))
        str_dir = str(aux_dir.as_posix())
        rel_dir = str_dir.replace(str(posix_root_dir), "")
        status_dir = pu.check_file_pattern_dir(str(aux_dir), pu.file_pattern_ephys_session['raw_np_files'], search_childs=False)
        
        if status_dir: 
            acquisitions_found += 1
            acquisition_df.loc[len(acquisition_df.index), 'acquisition_raw_rel_path'] = rel_dir
            
acquisition_df            

Unnamed: 0,acquisition_rat,acquisition_type,acquisition_raw_rel_path
0,,,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec0
1,,,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec1
2,,,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec2
3,,,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec0
4,,,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec1
...,...,...,...
1043,,,/VGAT-ReaChR acute 2020_01_31/500um distance/V...
1044,,,/VGAT-ReaChR acute 2020_01_31/500um distance/V...
1045,,,/VGAT-ReaChR acute 2020_01_31/500um distance/V...
1046,,,/VGAT-ReaChR acute 2020_01_31/500um lateral 40...


In [28]:
copy_acquisition_df = acquisition_df.copy()

In [55]:
acquisition_df = copy_acquisition_df.copy()

### 1a Get all directories with sorted results from clusterings_root_data_dir

In [3]:
sorted_dir = pathlib.Path(dj.config['custom']['ephys_root_data_dir'][1])
posix_sorted_dir = pathlib.Path(sorted_dir).as_posix()

fields_t_sort = pd.DataFrame.from_dict(acq.Sortings.heading.attributes, orient='index')
sorted_found = 0
sorted_df = pd.DataFrame(columns=fields_t_sort.index.to_list()) 
sorted_df = sorted_df.drop(columns=['acquisition_id', 'sorting_id'])


for root, dirs, files in os.walk(sorted_dir):
    for dirname in dirs:
        aux_dir = pathlib.Path(os.path.join(root, dirname))
        str_dir = str(aux_dir.as_posix())
        rel_dir = str_dir.replace(str(posix_sorted_dir), "") 
        status_dir_sorted = pu.check_file_pattern_dir(str(aux_dir), pu.file_pattern_ephys_session['sorted_np_files'], search_childs=False)
        
        if status_dir_sorted: 
            sorted_found += 1
            sorted_df.loc[len(sorted_df.index), 'acquisition_post_rel_path'] = rel_dir            
    #if acquisitions_found > 100:
        #break
        
        
sorted_df


Unnamed: 0,acquisition_post_rel_path
0,/Adrian/A230/2019-07-04/2019-07-04_g0_imec0
1,/Adrian/A230/2019-07-04/2019-07-04_g0_imec1
2,/Adrian/A230/2019-07-04/2019-07-04_g0_imec2
3,/Adrian/A230/2019-07-04 (bank 0)/2019-07-04_g0...
4,/Adrian/A230/2019-07-04 (bank 0)/2019-07-04_g0...
...,...
747,/Thomas/T304/T304_2020_10_04_2.8mm_ML/spikesor...
748,/Thomas/T305/T305_2020_10_17/spikesort_2020_10...
749,/Thomas/T305/T305_2020_10_17b/spikesort_2020_1...
750,/Thomas/T305/T305_2020_10_17c/spikesort_2020_1...


### 2. Infer subject and rat from path

In [56]:
acquisition_df['acquisition_type'] = 'ephys'
acquisition_df['experimenter'] = acquisition_df['acquisition_raw_rel_path'].str.split('/').str[1]
acquisition_df['acquisition_rat'] = acquisition_df['acquisition_raw_rel_path'].str.split('/').str[2]

### 3. Get experimenter user_id and merge with corresponding acquisition

In [57]:
contact_df = pd.DataFrame(new_lab.Contacts.fetch('user_id', 'experimenter', as_dict=True))
acquisition_df_nouser = acquisition_df.copy()
acquisition_df = acquisition_df.merge(contact_df, on='experimenter', how='inner')
acquisition_df_nouser = acquisition_df_nouser.merge(contact_df, on='experimenter', how='left')
acquisition_df_nouser = acquisition_df_nouser.loc[acquisition_df_nouser['user_id'].isnull(), :]
acquisition_df_nouser

Unnamed: 0,acquisition_rat,acquisition_type,acquisition_raw_rel_path,experimenter,user_id
1035,1500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/1500um distance/...,VGAT-ReaChR acute 2020_01_31,
1036,1500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/1500um distance/...,VGAT-ReaChR acute 2020_01_31,
1037,1500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/1500um distance/...,VGAT-ReaChR acute 2020_01_31,
1038,1500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/1500um distance/...,VGAT-ReaChR acute 2020_01_31,
1039,1mm distance,ephys,/VGAT-ReaChR acute 2020_01_31/1mm distance/VGA...,VGAT-ReaChR acute 2020_01_31,
1040,500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/500um distance/V...,VGAT-ReaChR acute 2020_01_31,
1041,500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/500um distance/V...,VGAT-ReaChR acute 2020_01_31,
1042,500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/500um distance/V...,VGAT-ReaChR acute 2020_01_31,
1043,500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/500um distance/V...,VGAT-ReaChR acute 2020_01_31,
1044,500um distance,ephys,/VGAT-ReaChR acute 2020_01_31/500um distance/V...,VGAT-ReaChR acute 2020_01_31,


### 4. Check corresponding ratname and filter non matching

In [58]:
subject_df = pd.DataFrame(new_subject.Rats.fetch('ratname', as_dict=True))
acquisition_df_norat = acquisition_df.copy()
acquisition_df = acquisition_df.merge(subject_df, left_on='acquisition_rat', right_on='ratname', how='inner')
acquisition_dfnorat = acquisition_df_norat.merge(subject_df, left_on='acquisition_rat', right_on='ratname', how='left')
acquisition_dfnorat = acquisition_dfnorat.loc[acquisition_dfnorat['ratname'].isnull(), :]
acquisition_dfnorat

Unnamed: 0,acquisition_rat,acquisition_type,acquisition_raw_rel_path,experimenter,user_id,ratname
281,Unimplanted,ephys,/Adrian/Unimplanted/trying to break a probe wi...,Adrian,abondy,
282,Unimplanted,ephys,/Adrian/Unimplanted/trying to break a probe wi...,Adrian,abondy,
283,Unimplanted,ephys,/Adrian/Unimplanted/trying to break a probe wi...,Adrian,abondy,
284,Unimplanted,ephys,/Adrian/Unimplanted/trying to break a probe wi...,Adrian,abondy,
285,Unimplanted,ephys,/Adrian/Unimplanted/trying to break a probe wi...,Adrian,abondy,
...,...,...,...,...,...,...
1030,Unimplanted,ephys,/Thomas/Unimplanted/gain_noise_18194819542_202...,Thomas,zhihaol,
1031,Unimplanted,ephys,/Thomas/Unimplanted/gain_noise_18194819542_202...,Thomas,zhihaol,
1032,Unimplanted,ephys,/Thomas/Unimplanted/gain_noise_18194819542_202...,Thomas,zhihaol,
1033,Unimplanted,ephys,/Thomas/Unimplanted/gain_noise_18194819542_202...,Thomas,zhihaol,


### 5 Check corresponding session and add it 

In [59]:
ephys_session_df = pd.DataFrame(new_acquisition.AcquisitionSessions.fetch('sessid', 'acquisition_raw_rel_path', as_dict=True))
ephys_session_df = ephys_session_df.rename(columns={'sessid':'acquisition_sessid'})
acquisition_df_nosession = acquisition_df.copy()
acquisition_df = acquisition_df.merge(ephys_session_df, on='acquisition_raw_rel_path', how='left')
#acquisition_df['acquisition_sessid'] = acquisition_df['acquisition_sessid'].astype('Int32')

acquisition_df_nosession = acquisition_df_nosession.merge(ephys_session_df, on='acquisition_raw_rel_path', how='left')
acquisition_df_nosession = acquisition_df_nosession.loc[acquisition_df_nosession['acquisition_sessid'].isnull(), :]
acquisition_df_nosession

Unnamed: 0,acquisition_rat,acquisition_type,acquisition_raw_rel_path,experimenter,user_id,ratname,acquisition_sessid
0,A230,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec0,Adrian,abondy,A230,
1,A230,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec1,Adrian,abondy,A230,
2,A230,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec2,Adrian,abondy,A230,
3,A230,ephys,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec0,Adrian,abondy,A230,
4,A230,ephys,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec1,Adrian,abondy,A230,
...,...,...,...,...,...,...,...
930,T304,ephys,/Thomas/T304/T304_2020_10_04_2.8mm_ML_g0,Thomas,zhihaol,T304,
931,T305,ephys,/Thomas/T305/T305_2020_10_17b_g0,Thomas,zhihaol,T305,
932,T305,ephys,/Thomas/T305/T305_2020_10_17c_g0,Thomas,zhihaol,T305,
933,T305,ephys,/Thomas/T305/T305_2020_10_17_g0,Thomas,zhihaol,T305,


## 6 Insert all acquisitions found

In [60]:
acquisition_df = acquisition_df.drop(columns=['experimenter', 'ratname'])
#acq.Acquisitions.insert(acquisition_df, skip_duplicates=True)

In [4]:
acquisition_db_df = pd.DataFrame(acq.Acquisitions.fetch(as_dict=True))
acquisition_db_df

Unnamed: 0,acquisition_id,acquisition_sessid,acquisition_rat,user_id,acquisition_type,acquisition_raw_rel_path
0,1,,A230,abondy,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec0
1,2,,A230,abondy,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec1
2,3,,A230,abondy,ephys,/Adrian/A230/2019-07-04_g0/2019-07-04_g0_imec2
3,4,,A230,abondy,ephys,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec0
4,5,,A230,abondy,ephys,/Adrian/A230/2019-07-06_g0/2019-07-06_g0_imec1
...,...,...,...,...,...,...
930,931,,T304,zhihaol,ephys,/Thomas/T304/T304_2020_10_04_2.8mm_ML_g0
931,932,,T305,zhihaol,ephys,/Thomas/T305/T305_2020_10_17b_g0
932,933,,T305,zhihaol,ephys,/Thomas/T305/T305_2020_10_17c_g0
933,934,,T305,zhihaol,ephys,/Thomas/T305/T305_2020_10_17_g0


### 7 Merge acquisitions and sortings

In [5]:
acquisition_db_df = acquisition_db_df[['acquisition_raw_rel_path', 'acquisition_id']].copy()
sorted_df['acquisition_raw_rel_path'] = sorted_df.apply(lambda x: pu.get_parent_dir(x['acquisition_post_rel_path']), axis=1)

sorted_df2 = sorted_df.merge(acquisition_db_df, on='acquisition_raw_rel_path', how='inner')

sorted_df2

Unnamed: 0,acquisition_post_rel_path,acquisition_raw_rel_path,acquisition_id
0,/Adrian/A230/A230_2019_07_15/2019-07-15_g0_ime...,/Adrian/A230/A230_2019_07_15/2019-07-15_g0_imec0,19
1,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g...,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g0,78
2,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g...,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g0,78
3,/Adrian/A241/A241_2020_01_22/A241_2020_01_22_g...,/Adrian/A241/A241_2020_01_22/A241_2020_01_22_g0,79
4,/Adrian/A241/A241_2020_01_23/A241_2020_01_23_g...,/Adrian/A241/A241_2020_01_23/A241_2020_01_23_g0,80
...,...,...,...
240,/Thomas/T227/T227_2020_03_14_idle_bank0/spikes...,/Thomas/T227/T227_2020_03_14_idle_bank0,784
241,/Thomas/T227/T227_2020_03_14_idle_bank1/spikes...,/Thomas/T227/T227_2020_03_14_idle_bank1,785
242,/Thomas/T227/T227_2020_03_15_idle_bank0/spikes...,/Thomas/T227/T227_2020_03_15_idle_bank0,786
243,/Thomas/T227/T227_2020_03_15_idle_bank1/spikes...,/Thomas/T227/T227_2020_03_15_idle_bank1,787


In [7]:
sorted_df2 = sorted_df2.drop(columns=['acquisition_raw_rel_path'])
#acq.Sortings.insert(sorted_df2, skip_duplicates=True)

In [19]:
sortings_db_df = pd.DataFrame(acq.Sortings.fetch(as_dict=True))
sortings_db_df

Unnamed: 0,sorting_id,acquisition_id,acquisition_post_rel_path
0,1,19,/Adrian/A230/A230_2019_07_15/2019-07-15_g0_ime...
1,2,78,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g...
2,3,78,/Adrian/A241/A241_2020_01_14/A241_2020_01_14_g...
3,4,79,/Adrian/A241/A241_2020_01_22/A241_2020_01_22_g...
4,5,80,/Adrian/A241/A241_2020_01_23/A241_2020_01_23_g...
...,...,...,...
465,466,931,/Thomas/T304/T304_2020_10_04_2.8mm_ML/spikesor...
466,467,934,/Thomas/T305/T305_2020_10_17/spikesort_2020_10...
467,468,932,/Thomas/T305/T305_2020_10_17b/spikesort_2020_1...
468,469,933,/Thomas/T305/T305_2020_10_17c/spikesort_2020_1...


### Account for g0 suffix

In [17]:
sorted_df4 = sorted_df.copy()
sorted_df4['acquisition_raw_rel_path'] = sorted_df4['acquisition_raw_rel_path'] + '_g0'
sorted_df4

Unnamed: 0,acquisition_post_rel_path,acquisition_raw_rel_path
0,/Adrian/A230/2019-07-04/2019-07-04_g0_imec0,/Adrian/A230/2019-07-04_g0
1,/Adrian/A230/2019-07-04/2019-07-04_g0_imec1,/Adrian/A230/2019-07-04_g0
2,/Adrian/A230/2019-07-04/2019-07-04_g0_imec2,/Adrian/A230/2019-07-04_g0
3,/Adrian/A230/2019-07-04 (bank 0)/2019-07-04_g0...,/Adrian/A230/2019-07-04 (bank 0)_g0
4,/Adrian/A230/2019-07-04 (bank 0)/2019-07-04_g0...,/Adrian/A230/2019-07-04 (bank 0)_g0
...,...,...
747,/Thomas/T304/T304_2020_10_04_2.8mm_ML/spikesor...,/Thomas/T304/T304_2020_10_04_2.8mm_ML_g0
748,/Thomas/T305/T305_2020_10_17/spikesort_2020_10...,/Thomas/T305/T305_2020_10_17_g0
749,/Thomas/T305/T305_2020_10_17b/spikesort_2020_1...,/Thomas/T305/T305_2020_10_17b_g0
750,/Thomas/T305/T305_2020_10_17c/spikesort_2020_1...,/Thomas/T305/T305_2020_10_17c_g0


In [18]:
sorted_df5 = sorted_df4.merge(acquisition_db_df, on='acquisition_raw_rel_path', how='inner')
sorted_df5

Unnamed: 0,acquisition_post_rel_path,acquisition_raw_rel_path,acquisition_id
0,/Thomas/T181/T181_2019_11_11_bank0/spikesort_2...,/Thomas/T181/T181_2019_11_11_bank0_g0,573
1,/Thomas/T181/T181_2019_11_11_bank1/spikesort_2...,/Thomas/T181/T181_2019_11_11_bank1_g0,574
2,/Thomas/T219/T219_2019_11_17_bank0/spikesort_2...,/Thomas/T219/T219_2019_11_17_bank0_g0,656
3,/Thomas/T219/T219_2019_11_17_bank1/spikesort_2...,/Thomas/T219/T219_2019_11_17_bank1_g0,657
4,/Thomas/T219/T219_2019_11_18_bank0/spikesort_2...,/Thomas/T219/T219_2019_11_18_bank0_g0,658
...,...,...,...
220,/Thomas/T304/T304_2020_10_04_2.8mm_ML/spikesor...,/Thomas/T304/T304_2020_10_04_2.8mm_ML_g0,931
221,/Thomas/T305/T305_2020_10_17/spikesort_2020_10...,/Thomas/T305/T305_2020_10_17_g0,934
222,/Thomas/T305/T305_2020_10_17b/spikesort_2020_1...,/Thomas/T305/T305_2020_10_17b_g0,932
223,/Thomas/T305/T305_2020_10_17c/spikesort_2020_1...,/Thomas/T305/T305_2020_10_17c_g0,933


In [14]:
sorted_df5 = sorted_df5.drop(columns=['acquisition_raw_rel_path'])
#acq.Sortings.insert(sorted_df5, skip_duplicates=True)