In [1]:
#############################################################
# Author(s): Debaditya, Anwesha, Anna                       #
#############################################################

In [2]:
import numpy as np
import csv
import glob
from itertools import compress

In [3]:
#Enter path to data folder:
data_root = '/NMA/Mapping Brain Networks/data/allData'

#Get list of files in glob
session_paths = glob.glob(data_root + '/*')

In [4]:
#@title groupings of brain regions
regions = ["vis ctx", "thal", "hipp", "other ctx", "midbrain",  "basal ganglia", "subplate"]
brain_groups = [["VISa", "VISam", "VISl", "VISp", "VISpm", "VISrl"], # visual cortex
                ["CL", "LD", "LGd", "LH", "LP", "MD", "MG", "PO", "POL", "PT", "RT", "SPF", "TH", "VAL", "VPL", "VPM"], # thalamus
                ["CA", "CA1", "CA2", "CA3", "DG", "SUB", "POST"], # hippocampal
                ["ACA", "AUD", "COA", "DP", "ILA", "MOp", "MOs", "OLF", "ORB", "ORBm", "PIR", "PL", "SSp", "SSs", "RSP"," TT"], # non-visual cortex
                ["APN", "IC", "MB", "MRN", "NB", "PAG", "RN", "SCs", "SCm", "SCig", "SCsg", "ZI"], # midbrain
                ["ACB", "CP", "GPe", "LS", "LSc", "LSr", "MS", "OT", "SNr", "SI"], # basal ganglia 
                ["BLA", "BMA", "EP", "EPd", "MEA"] # cortical subplate
               ]

In [5]:
sessions = [11,12] #Pick which sessions to load.
dat = {}
for session in sessions:
    dat[session] = {}

In [6]:
def get_session_info(root, path):
    '''
    This function returns the date on which the session was carried out and the name of the mouse.
    
    Args:
    root - [string] The root directory path.
    path - [string] The path to the session directory.
    
    Returns:
    date - [string] Date the experiment was conducted in YYYY-MM-DD format.
    name - [string] Name of the mouse
    '''
    
    #Get substring
    name_date = path.replace(data_root+'\\','')
    
    #Get date
    date = name_date[:-11]
    
    #Get name
    name = name_date[-10:]
    
    return date, name

def get_cluster_info(path):
    '''
    This function returns information about clusters.
    
    Args:
    path - [string] The path to the session directory.
    
    Returns:
    good_clusters - [ndarray] Logical values representing if a cluster is 'good'.
    brain_regions - [list] Location where a cluster is located.
    '''
    
    #Get good clusters. _phy_annotation >=2
    good_clusters = (np.load(path + '/clusters._phy_annotation.npy')>=2).flatten()
    
    #Get cluster_channels
    cluster_channels = (np.load(path + '/clusters.peakChannel.npy').astype(int) - 1).flatten()
    
    #Create brain region temp variable
    brain_regions = []
    
    #Open channel files
    with open(path + '/channels.brainLocation.tsv') as tsvfile:
        
        reader = csv.DictReader(tsvfile, dialect='excel-tab')
        
        for row in reader:
            
            #Parse regions
            brain_regions.append(row['allen_ontology'])
    
    #Create cluster location list.
    cluster_locations = []
    
    #Iterate through the channels and parse the brain locations
    for cluster_channel in cluster_channels:
        
        brain_region = brain_regions[cluster_channel]
        cluster_locations.append(brain_region)
        del brain_region
            
    #Return the variables.
    return good_clusters, cluster_locations
 
def get_cluster_spikes(path):
    '''
    This function retuns the spikes sorted according to clusters.
    
    args:
    path - [string] The path to the session directory.
    
    return:
    cluster_spikes - [list] This is a list of lists of spike timings.
    '''
    #Load the spikes
    spikes = np.load(path + '/spikes.times.npy', allow_pickle = True).flatten()
    
    #load the cluster_ids
    cluster_ids = np.load(path + '/spikes.clusters.npy', allow_pickle = True).flatten()
    
    #Create empty list
    clusters_spikes = [] #NOTE I CHANGED THIS LOOK INTO THIS LATER!
    
    #iterate through cluster_ids to arrange spikes.
    for cluster_id in range(np.max(cluster_ids)+1):
        cluster_spikes = spikes[np.where(cluster_ids == cluster_id)]
        clusters_spikes.append(cluster_spikes)
        
    #Return the variables.
    return cluster_spikes

def get_trial_info(path):
    '''
    This function returns all the information about the trials.
    '''
    trial_intervals = np.load(path + '/trials.intervals.npy', allow_pickle = True)
    visualStim_times = np.load(path + '/trials.visualStim_times.npy', allow_pickle = True)
    goCue_times = np.load(path + '/trials.goCue_times.npy', allow_pickle = True)
    response_times = np.load(path + '/trials.response_times.npy', allow_pickle = True)
    feedback_times = np.load(path + '/trials.feedback_times.npy', allow_pickle = True)
    feedback_type =np.load(path + '/trials.feedbackType.npy', allow_pickle = True)
    return trial_intervals, visualStim_times, goCue_times, response_times, feedback_times, feedback_type


In [7]:
#Load sessions
dat = {}
for session in sessions:
    dat[session] = {}
    path = session_paths[session]
    
    session_date, mouse_name = get_session_info(data_root, path)
    good_clusters, cluster_locations = get_cluster_info(path)
    cluster_spikes = get_cluster_spikes(path)
    trial_intervals, visualStim_times, goCue_times, response_times, feedback_times, feedback_type = get_trial_info(path) 
    
    dat[session]['session_date'] = session_date
    dat[session]['mouse_name'] = mouse_name
    dat[session]['good_clusters'] = good_clusters
    dat[session]['cluster_locations'] = cluster_locations
    dat[session]['cluster_spikes'] = cluster_spikes
    dat[session]['trial_intervals'] = trial_intervals
    dat[session]['visualStim_times'] = visualStim_times
    dat[session]['goCue_times'] = goCue_times
    dat[session]['response_times'] = response_times
    dat[session]['feedback_times'] = feedback_times
    dat[session]['feedback_type'] = feedback_type
    
    print('Data for session',session,'loaded.')

Data for session 11 loaded.
Data for session 12 loaded.


In [8]:
print(dat[11].keys())

dict_keys(['session_date', 'mouse_name', 'good_clusters', 'cluster_locations', 'cluster_spikes', 'trial_intervals', 'visualStim_times', 'goCue_times', 'response_times', 'feedback_times', 'feedback_type'])


In [9]:
## This part of the code can be used to save the data currently in memory

def save_data(filename, objects):
    '''
    This function will save the data you want to into the named file
    
    Args:
    
    filename - [string] The name of the file that you want to save your data into. (include extention .pkl)
    objects - [list] The list of objects you want to store from memory into the file.
    
    Return:
    
    void
    
    Usage Example:
    save_data('data.pkl',[no_of_sessions ,spontaneous_intervals, trials_intervals, channel_brainLocations, clusters_phy_annotation, clusters_peakChannel, spikes_amps, spikes_clusters, spikes_depths, spikes_times])
    '''
    
    #Grab dependencies
    import pickle
    
    #Open file
    with open(filename, 'wb') as f:
        #Dump memory
        pickle.dump(data, f)
        

In [None]:
# #This cell loads all the intervals

# #Load intervals
# for session in sessions:
    
#     #The path to current session
#     path = session_paths[session]
    
#     #The name & date
#     session_date = get_name(data_root, path)
#     mouse_name = get_date(data_root, path)
    
#     #List of cells which are good.
#     good_cells = (np.load(path + '/clusters._phy_annotation.npy')>=2).flatten()
#     cluster_channels = (np.load(path + '/clusters.peakChannel.npy').astype(int) - 1).flatten()
    
# #     #Debug
# #     print(good_cells)
# #     print(cluster_channels)
# #     print(np.shape(cluster_channels),print(np.shape(good_cells)))
# #     print(np.shape(cluster_channels[good_cells].flatten()))
    
# #     brain_regions = []
# #     with open(path + '/channels.brainLocation.tsv') as tsvfile:
# #         reader = csv.DictReader(tsvfile, dialect='excel-tab')
# #         for row in reader:
# #             brain_regions.append(row['allen_ontology'])
# #         del reader
    
# #     #Debug
# #     print(np.shape(brain_regions))
# #     print(brain_regions)
    

#     cluster_locations = []
#     for cluster_channel in cluster_channels:
#         brain_region = brain_regions[cluster_channel]
#         cluster_locations.append(brain_region)
#         del brain_region
    
# #     #Debug
# #     print(np.shape(cluster_locations))
# #     print(len(list(compress(cluster_channels,good_cells))))
# #     print(np.sum(good_cells))
        
#     spikes = np.load(path + '/spikes.times.npy', allow_pickle = True).flatten()
#     cluster_ids = np.load(path + '/spikes.clusters.npy', allow_pickle = True).flatten()
    
# #     #Debug
# #     print(np.min(cluster_ids))
# #     print(np.max(cluster_ids))
    
#     clusters_spikes = [] #NOTE I CHANGED THIS LOOK INTO THIS LATER!
#     for cluster_id in range(np.max(cluster_ids)+1):
#         cluster_spikes = spikes[np.where(cluster_ids == cluster_id)]
#         clusters_spikes.append(cluster_spikes)
#         del cluster_spikes
    
# #     #Debug
# #     print(type(clusters_spikes))
# #     print(len(clusters_spikes))
# #     print(clusters_spikes[np.max(cluster_ids)+1])
    
#     trial_intervals = np.load(path + '/trials.intervals.npy', allow_pickle = True)
#     no_of_trials = len(trial_intervals)
#     visualStim_times = np.load(path + '/trials.visualStim_times.npy', allow_pickle = True)
#     goCue_times = np.load(path + '/trials.goCue_times.npy', allow_pickle = True)
#     response_times = np.load(path + '/trials.response_times.npy', allow_pickle = True)
#     feedback_times = np.load(path + '/trials.feedback_times.npy', allow_pickle = True)
#     feedback_type =np.load(path + '/trials.feedbackType.npy', allow_pickle = True)
    
#     dat[session]
    
    
    
    
    
    
# #     print(no_of_trials)
# #     trial_start_t = trial_intervals[:,0]
# #     trial_end_t = trial_intervals[:,1]
# #     trials_spikes = []
# #     for trial in range(1):
# #         trial_spikes = []
# #         trial_start = trial_intervals[trial,0]
# #         trial_end = trial_intervals[trial,1]
        
# #         for cluster_id in cluster_ids:
# #             cluster_spikes = clusters_spikes[cluster_id]
# #             response_in_trial = np.logical_and(cluster_spikes>=trial_start,cluster_spikes<trial_end)
# #             trial_spikes.append(np.array(compress(cluster_spikes,response_in_trial)))
# #             del cluster_spikes
# #             del response_in_trial
# #         trials_spikes.append(trial_spikes)
# #         del trial_spikes
# #         print("Finished grouping spikes for trial",trial)
# #             #print(type(cluster_spikes_trial))
# #     trial_intervals = np.load(path + '/trials.intervals.npy', allow_pickle = True)
# #     for trial_no in range(len(trial_intervals)):
# #         trial_start = trial_intervals[trial_no,0]
# #         trial_end = trial_intervals[trial_no,1]
# #         trial_spikes = {}
# #         for cell_id, cell_spike in enumerate(cell_spikes):
# #             trial_spike = cell_spike[cell_spike>=trial_start]
# #             trial_spike = trial_spike[trial_spike<trial_end]
# #             trial_spikes[cell_id] = cell_spike[cell_spike>=trial_start and cell_spike>trial_end]
# #         dat[session]['trial_spikes'].append(trial_spikes)
    