# Interactively run workflow calcium imaging

+ This notebook walks you through the steps in detail to run the `workflow-calcium-imaging`.  

+ The workflow requires the calcium imaging acquired data from ScanImage or Scanbox and processed data from Suite2p or CaImAn.

+ If you haven't configured the paths, refer to [01-configure](01-configure.ipynb).

+ To overview the schema structures, refer to [02-workflow-structure](02-workflow-structure.ipynb).

+ If you need a more automatic approach to run the workflow, refer to [03-automate](03-automate-optional.ipynb).

Let's change the directory to the package root directory to load the local configuration (`dj_local_conf.json`).

In [1]:
# reload code if library changes
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
import os
if os.path.basename(os.getcwd()) == "notebooks": os.chdir("..")
import numpy as np

In [3]:
skip_ds = ['jm007/2022-06-13_a'] # corrupted metadata

## `Pipeline.py`

+ This script `activates` the DataJoint `elements` and declares other required tables.

In [4]:
import datajoint as dj
from workflow_calcium_imaging.pipeline import *
populate_settings = {'display_progress': True}

[2023-02-28 11:16:51,324][INFO]: Connecting jure@127.0.0.1:3306
[2023-02-28 11:16:51,416][INFO]: Connected jure@127.0.0.1:3306


In [5]:
dj.config

{   'connection.charset': '',
    'connection.init_function': None,
    'custom': {   'database.prefix': 'jure_',
                  'imaging_root_data_dir': '/media/cossart/DATA/dj_cossart/data'},
    'database.host': '127.0.0.1',
    'database.password': 'allezlom',
    'database.port': 3306,
    'database.reconnect': True,
    'database.use_tls': None,
    'database.user': 'jure',
    'display.limit': 12,
    'display.show_tuple_count': True,
    'display.width': 14,
    'enable_python_native_blobs': True,
    'fetch_format': 'array',
    'filepath_checksum_size_limit': None,
    'loglevel': 'INFO',
    'safemode': True}

In [6]:
# move to library
def get_metadata_from_filetree(root_data_dir, fake_session_datetime_str_init):
    all_subject_str = []
    all_session_str = [] # this will be list of lists - each nested list corresponding to one subject
    all_datetime_str = [] # for now hardcoded! (figure out how to do programmatically)

    count = 0
    for subject_str in os.listdir(root_data_dir):
        if os.path.isdir(f'{root_data_dir}/{subject_str}'):

            print(f'Subject: {subject_str}')
            all_subject_str.append(subject_str)

            all_subject_session_str = [] # sessions for this particular subject
            all_subject_datetime_str = []
            for subject_session_str in os.listdir(root_data_dir + '/' + subject_str):
                all_subject_session_str.append(subject_session_str)
                fake_session_datetime_str = fake_session_datetime_str_init[:18] + str(count) + '.000' # making fake unique time
                print('\n\n\nIMPORTANT: JM made up a fake datetime to fit convention of DJ. If needed for analysis, the true datetime of the experiment can still be accessed though through the `session` entry (YYYY-MM-DD_x) or from where the bruker metadata is stored within the database.\n\n\n')
                print(fake_session_datetime_str)
                all_subject_datetime_str.append(fake_session_datetime_str) # here it is fake
                count += 1

            print(f'Identified sessions for subject {subject_str}: {all_subject_session_str}')

            all_session_str.append(all_subject_session_str) 
            all_datetime_str.append(all_subject_datetime_str) 
        
    return all_subject_str, all_session_str, all_datetime_str

### Clear previous metadata entries

In [7]:
# clear any previous entries
subject.Subject.delete()
Equipment.delete()

[2023-02-28 11:16:51,692][INFO]: Deleting 4 rows from `jure_imaging`.`_motion_correction__summary`
[2023-02-28 11:16:51,700][INFO]: Deleting 18 rows from `jure_scan`.`_scan_info__field`
[2023-02-28 11:16:52,614][INFO]: Deleting 470400 rows from `jure_scan`.`_scan_info__scan_file`
[2023-02-28 11:16:52,709][INFO]: Deleting 11 rows from `jure_scan`.`_scan_info`
[2023-02-28 11:16:52,795][INFO]: Deleting 1499 rows from `jure_imaging`.`__mask_classification__mask_type`
[2023-02-28 11:16:52,805][INFO]: Deleting 2 rows from `jure_imaging`.`__mask_classification`
[2023-02-28 11:16:52,864][INFO]: Deleting 2953 rows from `jure_imaging`.`__segmentation__mask`
[2023-02-28 11:16:52,876][INFO]: Deleting 2 rows from `jure_imaging`.`__segmentation`
[2023-02-28 11:16:52,898][INFO]: Deleting 2 rows from `jure_imaging`.`_motion_correction__rigid_motion_correction`
[2023-02-28 11:16:52,905][INFO]: Deleting 2 rows from `jure_imaging`.`_motion_correction`
[2023-02-28 11:16:52,915][INFO]: Deleting 2 rows from

KeyboardInterrupt: Interrupted by user

## Insert an entry into `subject.Subject`

In [None]:
subject.Subject.heading

In [None]:
from element_interface.prairieviewreader import get_pv_metadata

In [None]:
import pandas as pd

In [None]:
# custom function to populate database
root_data_dir = dj.config['custom']['imaging_root_data_dir']
fake_session_datetime_str_init = '2002-01-01 12:00:00.000' # making up session datetime (to query data use either the session/folder name or PraireView metadata)
all_subject_str, all_session_str, all_datetime_str = get_metadata_from_filetree(root_data_dir, fake_session_datetime_str_init)


In [None]:
print(f'All subjects: {all_subject_str}')
print(f'All session: {all_session_str}')


In [None]:
for subject_str in all_subject_str:
    
    subject.Subject.insert1(dict(subject=subject_str, 
                                 sex='U', 
                                 subject_birth_date='2020-01-01', # FIGURE OUT HOW TO HANDLE
                                 subject_description=''))

In [None]:
subject.Subject()

## Insert an entry into `lab.Equipment`

In [None]:
Equipment.insert1(dict(scanner='PrairieView'))

## Insert an entry into `session.Session`

In [None]:
session.Session.describe();

In [None]:
session.Session.heading

In [None]:
session.Session()

In [None]:
all_datetime_str

In [None]:
for (i, subject_str) in enumerate(all_subject_str):
    for (j, subject_session_str) in enumerate(all_session_str[i]):
        
        session_datetime_str = all_datetime_str[i][j]
        print(session_datetime_str)
        session_key = dict(subject=subject_str, session_datetime=session_datetime_str)

        session.Session.insert1(session_key)

session.Session()

## Insert an entry into `session.SessionDirectory`

+ The `session_dir` is the relative path to the `imaging_root_data_dir` for the given session, in POSIX format with `/`.

+ Instead of a relative path, `session_dir` could be an absolute path but it is not recommended as the absolute path would have to match the `imaging_root_data_dir` in `dj_local_conf.json`.

In [None]:
session.SessionDirectory.describe();

In [None]:
session.SessionDirectory.heading

In [None]:
import glob
def get_TSeries_name(subject_str, subject_session_str):
    imaging_root_data_dir = dj.config['custom']['imaging_root_data_dir']
    glob_prompt = f'{imaging_root_data_dir}/{subject_str}/{subject_session_str}/TSeries*'
    tseries_fullpaths_list = glob.glob(glob_prompt)
    
    if len(tseries_fullpaths_list) > 1: print('WARNING: more then one TSeries* folder found, only considering the FIRST ONE')
    
    tseries_name = tseries_fullpaths_list[0].split('/')[-1]
    return tseries_name

In [None]:
for (i, subject_str) in enumerate(all_subject_str):
    for (j, subject_session_str) in enumerate(all_session_str[i]):
        
        if subject_str + '/' + subject_session_str not in skip_ds:

            session_datetime_str = all_datetime_str[i][j]
            tseries_name = get_TSeries_name(subject_str, subject_session_str)

            session_dir = f'{subject_str}/{subject_session_str}/{tseries_name}'

            session.SessionDirectory.insert1(dict(subject=subject_str, 
                                                  session_datetime=session_datetime_str, 
                                                  session_dir=session_dir))

            session.SessionDirectory()

## Insert an entry into `scan.Scan`

In [None]:
scan.Scan.heading

In [None]:
for (i, subject_str) in enumerate(all_subject_str):
    for (j, subject_session_str) in enumerate(all_session_str[i]):
        
        if subject_str + '/' + subject_session_str not in skip_ds:

            session_datetime_str = all_datetime_str[i][j]

            scan.Scan.insert1(dict(subject=subject_str, 
                                   session_datetime=session_datetime_str, 
                                   scan_id=0, 
                                   scanner='PrairieView', 
                                   acq_software='PrairieView',
                                   scan_notes=''))
            scan.Scan()

## Populate `scan.ScanInfo`

+ This imported table stores information about the acquired image (e.g. image dimensions, file paths, etc.).
+ `populate` automatically calls `make` for every key for which the auto-populated table is missing data.
+ `populate_settings` passes arguments to the `populate` method.
+ `display_progress=True` reports the progress bar

In [None]:
scan.ScanInfo.describe();

In [None]:
scan.ScanInfo.heading

In [None]:
scan.ScanInfo.populate(**populate_settings)
scan.ScanInfo()
