A notebook to see which subjects for which we have registered volumes we also have full annotations for. 

There may also be some full annotations for which we do not have registered volumes for, but we do not care about this as much, since we have no imaging data to analyze for these subjects.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
from pathlib import Path

import numpy as np
import pandas as pd

from keller_zlatic_vnc.data_processing import generate_standard_id_for_full_annots
from keller_zlatic_vnc.data_processing import generate_standard_id_for_volume
from keller_zlatic_vnc.data_processing import match_standard_subject_ids


## Parameters go here

In [3]:
# Folders containing a4 and a9 annotation data
a4_annot_folder = r'A:\projects\keller_vnc\data\full_annotations\behavior_csv_cl_A4'
a9_annot_folder = r'A:\projects\keller_vnc\data\full_annotations\behavior_csv_cl_A9'
spont_only_annot_folder = r'A:\projects\keller_vnc\data\full_annotations\spontaneous_only_annotations'

# File containing locations to registered volumes
volume_loc_file = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

## Get list of all annotation files

In [4]:
annot_file_paths = (glob.glob(str(Path(a4_annot_folder) / '*.csv')) + glob.glob(str(Path(a9_annot_folder) / '*.csv')) +
                    glob.glob(str(Path(spont_only_annot_folder) / '*.csv')))
annot_file_names = [Path(p).name for p in annot_file_paths]

In [5]:
len(set(annot_file_names))

73

## Read in locations of all registered volumes

In [6]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

volume_locs = pd.read_excel(volume_loc_file, header=1, usecols=[1, 2], converters=converters)

## See what volumes we also have full annotations for

In [7]:
volume_subjs = [generate_standard_id_for_volume(volume_locs.loc[i,'Main folder'], 
                                                       volume_locs.loc[i,'Subfolder'])  for i in volume_locs.index]

In [8]:
annot_subjs = [generate_standard_id_for_full_annots(fn) for fn in annot_file_names]

In [9]:
matched_volume_s_ids = [s_id for s_id in volume_subjs if match_standard_subject_ids(s_id, annot_subjs) is not None]

## Now do some basic checks on the matches

In [10]:
# Verify we have a unique set of matches
if len(set(matched_volume_s_ids)) != len(matched_volume_s_ids):
    print('Non-unique matches found')

## Now see which volumes have matches and non-matched

In [11]:
print('Matched Volumes:')
for s_id in matched_volume_s_ids:
    print(s_id)

Matched Volumes:
CW_17-08-23-L1
CW_17-08-23-L2
CW_17-08-23-L4
CW_17-08-24-L4
CW_17-08-24-L5
CW_17-08-26-L1
CW_17-08-26-L2
CW_17-08-26-L4
CW_17-08-26-L5
CW_17-08-26-L6
CW_17-08-27-L1
CW_17-08-27-L2
CW_17-08-27-L4
CW_17-08-27-L5
CW_17-08-28-L1
CW_17-08-28-L2
CW_17-08-29-L2
CW_17-08-31-L1
CW_17-09-01-L1
CW_17-09-01-L2
CW_17-09-01-L3
CW_17-11-02-L3
CW_17-11-03-L1
CW_17-11-03-L2
CW_17-11-03-L3
CW_17-11-03-L5
CW_17-11-03-L7
CW_17-11-04-L1
CW_17-11-04-L2
CW_17-11-04-L3
CW_17-11-04-L4
CW_17-11-05-L6
CW_17-11-05-L7
CW_17-11-06-L1
CW_17-11-06-L2
CW_17-11-06-L3
CW_17-11-07-L3
CW_17-11-07-L4
CW_17-11-07-L5
CW_17-11-08-L1
CW_17-11-08-L2
CW_17-11-08-L3
CW_17-11-26-L1
CW_17-11-26-L2
CW_17-11-26-L3
CW_17-11-26-L4
CW_17-11-26-L5
CW_17-11-27-L1
CW_17-11-27-L2
CW_17-11-27-L3
CW_17-11-27-L4
CW_17-11-27-L5
CW_17-11-28-L2
CW_17-11-28-L4
CW_17-11-28-L6
CW_17-11-29-L1
CW_17-11-29-L2
CW_17-11-29-L3
CW_17-11-29-L4
CW_17-11-29-L5
CW_17-11-29-L6
CW_17-11-30-L2
CW_17-12-11-L3


In [12]:
non_matched_s_ids = np.sort(list(set(volume_subjs) - set(matched_volume_s_ids)))
print('Non-matched volume subjects')
for s_id in non_matched_s_ids:
    print(s_id)

Non-matched volume subjects
CW_17-11-03-L6
CW_17-11-07-L2


In [13]:
non_matched_annot_s_ids = np.sort(list(set(annot_subjs) - set(matched_volume_s_ids)))
print('Non-matched annotation subjects')
for s_id in non_matched_annot_s_ids:
    print(s_id)

Non-matched annotation subjects
CW_17-08-24-L1
CW_17-08-24-L2-1
CW_17-08-24-L2-2
CW_17-08-27-L3
CW_17-08-28-L3
CW_17-11-03-L6-1
CW_17-11-03-L6-2
CW_17-11-28-L3
CW_17-11-30-L3
CW_17-12-11-L2
