# Ingest ephys session path on bl_new_acquisition.acquisition_sessions table

In [1]:
from scripts.conf_file_finding import try_find_conf_file
try_find_conf_file()

Local configuration file found !!, no need to run the configuration (unless configuration has changed)


## Connection to DB

In [2]:
import datajoint as dj
import pandas as pd
import utility.path_utility as pu
import os

bdata          = dj.create_virtual_module('bdata', 'bl_bdata')
shadow_acquisition = dj.create_virtual_module('shadow_acquisition', 'bl_shadow_acquisition')
new_acquisition = dj.create_virtual_module('new_acquisition', 'bl_new_acquisition')
new_lab = dj.create_virtual_module('new_lav', 'bl_new_lab')
ratinfo        = dj.create_virtual_module('ratinfo', 'bl_ratinfo')

Connecting alvaros@datajoint01.pni.princeton.edu:3306


## Get all sessions info

In [3]:
list_sessions_fields = ['sessid', 'session_rat', 'session_userid', 'session_rigid', 'session_date']
sessions_df = pd.DataFrame(new_acquisition.Sessions.fetch(*list_sessions_fields, order_by='sessid desc', as_dict=True))
list_contacts_fields = ['user_id', 'experimenter']
contact_df = pd.DataFrame(new_lab.Contacts.fetch(*list_contacts_fields, as_dict=True))
sessions_df = sessions_df.merge(contact_df, left_on='session_userid', right_on='user_id')
sessions_df['session_date'] = sessions_df['session_date'].astype('str')

# 1. Raw session processing

## 1.1 Construct and find nominal paths

In [None]:
ephys_root = dj.config['custom']['ephys_root_data_dir']
sessions_df['subject_path'] = sessions_df.apply(lambda x: pu.combine_str_path(ephys_root, [x['experimenter'], x['session_rat']]), axis=1)
sessions_df['nominal_session_path'] = sessions_df.apply(lambda x: pu.check_date_directory(x['subject_path'], x['session_date']), axis=1)

## 1.2 Filter only sessions with nominal path found

In [None]:
raw_sessions_df_nom_path_found = sessions_df.loc[~sessions_df['nominal_session_path'].isin(pu.path_not_found_dict.values()), :]
raw_sessions_df_nom_path_found = raw_sessions_df_nom_path_found.reset_index(drop=True)

## 1.3 Enumerate all possible directories for each session

In [None]:
#If multiple paths found, this will create a record for each "possibility"
raw_sessions_df_nom_path_found = raw_sessions_df_nom_path_found.explode(['nominal_session_path'])
raw_sessions_df_nom_path_found = raw_sessions_df_nom_path_found.sort_values(by=['sessid'])

## 1.4 Find session files in nominal directories and childs

In [None]:
raw_sessions_df_nom_path_found['real_raw_session_path'] = \
raw_sessions_df_nom_path_found.apply(lambda x: pu.find_file_pattern_dir(x['nominal_session_path'], \
                                                                         pu.file_pattern_ephys_session['raw_np_files']),axis=1)

#If several recoring files are found inside a "parent" path
raw_sessions_df_nom_path_found = raw_sessions_df_nom_path_found.explode(['real_raw_session_path'])

raw_sessions_df_found = raw_sessions_df_nom_path_found.loc[~raw_sessions_df_nom_path_found['real_raw_session_path'].isin(pu.path_not_found_dict.values()), :]
raw_sessions_df_found['raw_session_rel_path'] = raw_sessions_df_found.loc[:,'real_raw_session_path'].str.replace(dj.config['custom']['ephys_root_data_dir'], '', regex=False)
raw_sessions_df_found = raw_sessions_df_found.reset_index(drop=True)

In [None]:
raw_sessions_df_found

# 2. Sorted session processing

## 2.1 Construct and find nominal paths

In [None]:
cluster_root = dj.config['custom']['clustering_root_data_dir']
raw_sessions_df_found['subject_cluster_path'] = raw_sessions_df_found.apply(lambda x: pu.combine_str_path(cluster_root, [x['experimenter'], x['session_rat']]), axis=1)
raw_sessions_df_found['nominal_cluster_session_path'] = raw_sessions_df_found.apply(lambda x: pu.check_date_directory(x['subject_cluster_path'], x['session_date']), axis=1)


## 2.2 Filter only sessions with nominal path found

In [None]:
cluster_sessions_df_nom_path_found = raw_sessions_df_found.loc[~raw_sessions_df_found['nominal_cluster_session_path'].isin(pu.path_not_found_dict.values()), :]
cluster_sessions_df_nom_path_found = cluster_sessions_df_nom_path_found.reset_index(drop=True)


## 2.3 Enumerate all possible directories for each session

In [None]:
#If multiple paths found, this will create a record for each "possibility"
cluster_sessions_df_nom_path_found = cluster_sessions_df_nom_path_found.explode(['nominal_cluster_session_path'])
cluster_sessions_df_nom_path_found = cluster_sessions_df_nom_path_found.sort_values(by=['sessid'])


## 2.4 Find session files in nominal directories and childs

In [None]:
cluster_sessions_df_nom_path_found['real_cluster_session_path'] =\
cluster_sessions_df_nom_path_found.apply(lambda x: pu.find_file_pattern_dir(x['nominal_cluster_session_path'],\
                                                                     pu.file_pattern_ephys_session['sorted_np_files']),axis=1)

#If several recoring files are found inside a "parent" path
cluster_sessions_df_nom_path_found = cluster_sessions_df_nom_path_found.explode(['real_cluster_session_path'])


cluster_sessions_df_found = cluster_sessions_df_nom_path_found.loc[~cluster_sessions_df_nom_path_found['real_cluster_session_path'].isin(pu.path_not_found_dict.values()), :]
cluster_sessions_df_found['cluster_session_rel_path'] = cluster_sessions_df_found.loc[:,'real_cluster_session_path'].str.replace(dj.config['custom']['clustering_root_data_dir'], '', regex=False)
cluster_sessions_df_found = cluster_sessions_df_found.reset_index(drop=True)
cluster_sessions_df_found

# 3.  Ingest into DB (preAcquisitionSession)

## 3.1 Add/Select columns from the DF --> DB

In [None]:
#Count how many possible directories for each session we have
cluster_sessions_df_found['directory_num'] =  cluster_sessions_df_found.groupby('sessid').cumcount()
cluster_sessions_df_found = cluster_sessions_df_found.reset_index(drop=True)


cluster_sessions_df_found = cluster_sessions_df_found.rename(columns={"cluster_session_rel_path": "acquisition_post_rel_path",\
                                          "raw_session_rel_path": "acquisition_raw_rel_path"})

cluster_sessions_df_found['acquisition_type'] = 'ephys'
cluster_sessions_df_found['correct_dirs'] = 0

pre_acquisition_sessions_df = cluster_sessions_df_found[new_acquisition.PreAcquisitionSessions.heading.names]
pre_acquisition_sessions_df





In [None]:
so = pre_acquisition_sessions_df.groupby('directory_num').max()
pre_acquisition_sessions_df.loc[pre_acquisition_sessions_df['sessid'] == 710898, 'acquisition_post_rel_path'].values

## 3.2 Ingest to preAcquisitionSession

In [None]:
dict_keys = pre_acquisition_sessions_df.to_dict(orient='records')
for i in dict_keys:
    new_acquisition.PreAcquisitionSessions.insert1(i)

# 4. Update correct_dirs of known PreAcquisitionSessions (triggers AcquisitionSessions insert)

## 4.1  Fetch from PreAcquisitionSessions

In [None]:
preacq_sessions_df = pd.DataFrame(new_acquisition.PreAcquisitionSessions.fetch(order_by='sessid desc', as_dict=True))
idx_duplicate_raw = preacq_sessions_df['sessid'].duplicated(keep=False)
preacq_sessions_df = preacq_sessions_df[~idx_duplicate_raw]
preacq_sessions_df = preacq_sessions_df.loc[preacq_sessions_df['directory_num'] == 0, :]
preacq_sessions_df = preacq_sessions_df.reset_index(drop=True)
preacq_sessions_df

## 4.2 Find unequivocally relation between directories and session

In [None]:
idx_duplicate_raw = preacq_sessions_df['acquisition_raw_rel_path'].duplicated(keep=False)
preacq_sessions_df_unique_raw = preacq_sessions_df[~idx_duplicate_raw]
preacq_sessions_df_unique_raw = preacq_sessions_df_unique_raw.reset_index(drop=True)
idx_duplicate_post = preacq_sessions_df_unique_raw['acquisition_post_rel_path'].duplicated(keep=False)
acq_sessions_df_unique = preacq_sessions_df_unique_raw[~idx_duplicate_post]
acq_sessions_df_unique = acq_sessions_df_unique.reset_index(drop=True)
acq_sessions_df_unique

## 4.3 Update correct_dir of found session trigger ingest acquisitionSessions

In [None]:
for i in range(acq_sessions_df_unique.shape[0]):
    key = dict()
    key['sessid'] = acq_sessions_df_unique.loc[i, 'sessid']
    key['directory_num'] = 0
    key['correct_dirs'] =  1
    (new_acquisition.PreAcquisitionSessions).update1(key)

## 4.4 Check AcquisitionSessions records

In [None]:
acq_sessions_df = pd.DataFrame(new_acquisition.AcquisitionSessions.fetch(order_by='sessid desc', as_dict=True))
acq_sessions_df