# Download data from Allen SDK

Steps performed:

*   Download files from AllenSDK repository
*   Extract some features
*   Resample signals to 2p imaging rate
*   Save everything as xarray
*   Delete temporary files
*   Copy local files to Google Drive

Adrian Hoffmann, 2021-08-13

### Install packages + imports

In [None]:
!python -m pip install --upgrade pip
!pip install mindscope_utilities
!pip install allensdk==2.12.1

### Restart the runtime!!!

In [1]:
# really, restart the runtime...

### Imports

In [2]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

import mindscope_utilities
import mindscope_utilities.visual_behavior_ophys as ophys

from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache


In [None]:
data_storage_directory = "/temp" # Note: this path must exist on your local drive
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=data_storage_directory)

# Select ids to download

### In case of only a single mouse:

In [None]:
##### Load meta-data for all experiments
data_storage_directory = "/temp" # Note: this path must exist on your local drive
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=data_storage_directory)
all_sessions = cache.get_ophys_session_table()

###### get ophys ids
# sessions = all_sessions.query('mouse_id == 431252')
sessions = all_sessions.query('mouse_id == 479839')

print('Session types:', sorted(sessions.session_type.unique() ))
print('Number of sessions:', len(sessions))
print('Number of mice:', len( sessions.mouse_id.unique() ))

# sessions
ophys_ids = sorted( list( sessions.index ) )
sessions

### In case of specific sets of sessions

In [None]:
##### Load meta-data for all experiments
data_storage_directory = "/temp" # Note: this path must exist on your local drive
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=data_storage_directory)
all_sessions = cache.get_ophys_session_table()

###### get ophys ids
ses_part = all_sessions.query('full_genotype == "Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt"')
sessions = ses_part.query('session_type in ("OPHYS_1_images_A", "OPHYS_3_images_A")')

print('Session types:', sorted(sessions.session_type.unique() ))
print('Number of sessions:', len(sessions))
print('Number of mice:', len( sessions.mouse_id.unique() ))

# sessions
ophys_ids = sorted( list( sessions.index ) )
print( ophys_ids )

# Automatic download, formatting and saving of sessions

In [None]:
## Each session (behavior_session_id) can have one or multiple 'ophys_experiment_id'
# ophys_id = 903621170   # mesoscope session with multiple imaging planes
# num = 0
for num, ophys_id in enumerate(ophys_ids):

  try:
    print('Downloading {}/{}'.format(num, len(ophys_ids)))
    print('File:', ophys_id)
    experiments = {}
    ophys_experiment_ids = all_sessions.loc[ophys_id]['ophys_experiment_id']
    for ophys_experiment_id in ophys_experiment_ids:
        experiments[ophys_experiment_id] = cache.get_behavior_ophys_experiment(ophys_experiment_id)

    ###### extract activity from sessions
    act_part = list()
    act_cell_ids = list()

    for ophys_experiment_id in tqdm(experiments.keys()):
        one_experiment = experiments[ophys_experiment_id]

        # filtered activity
        act_pd = one_experiment.events.filtered_events
        act_part.append( np.array( [it for it in act_pd]) )  # transform to array (neurons,time)

        # corresponding cell_ids (over all sessions)
        act_cell_ids.extend( list(one_experiment.events.index) )

    act_all = np.concatenate(act_part, axis=0)
    img_time = one_experiment.ophys_timestamps
    print('Shape of neural activity:', act_all.shape)   


    ###### Create a big data array
    import xarray as xr
    nr_neurons = act_all.shape[0]

    ### neural activity
    activity = xr.DataArray( data = act_all.astype(np.float32),
                      coords = [act_cell_ids, img_time],
                      dims = ['neuron_id', 'time'],
                      name='activity',
                    )

    ### stimulation details
    stim_details = one_experiment.stimulus_presentations.drop(columns = ['image_set'])
    stim_details = stim_details.drop(0) # remove first entry

    # create new entry with start_frame
    img_time = activity.time.data
    stim_times = np.array( stim_details['start_time'] )
    stim_frames = np.zeros_like( stim_times ).astype(int)
    for i, stim_time in enumerate(stim_times):
      stim_frames[i] = np.argmin( np.abs( img_time - stim_time))

    # change to xarray
    stim_details = stim_details.reset_index()
    stim_details['start_frame'] = stim_frames
    stim_details = stim_details[['index', 'start_frame', 'start_time', 'image_index', 'image_name', 'is_change']]
    images = stim_details.to_xarray()
    images = images.rename( dict(index='trial') ) # rename index to trial

    ### Running speed of mouse
    speed = np.array( one_experiment.running_speed['speed'] )
    speed_time = np.array( one_experiment.running_speed['timestamps'] )

    from scipy.interpolate import interp1d
    interp_fun = interp1d(x=speed_time, y=speed, bounds_error=False, fill_value=0)
    speed_imaging = interp_fun( activity.time )

    running = xr.DataArray( data = speed_imaging,
                            coords = [activity.time],
                            name='running')

    ### Pupil diameter and blinking
    pupil_time = np.array( one_experiment.eye_tracking['timestamps'] )

    area = np.array( one_experiment.eye_tracking['pupil_area'] )
    no_nan = (np.isnan(area) == False)
    interp_fun = interp1d(x=pupil_time[no_nan], y=area[no_nan], bounds_error=False, fill_value=0)
    area_img = interp_fun( activity.time )

    pupil = xr.DataArray( data = area_img,
                          coords = [activity.time],
                          name='pupil')

    blink = np.array( one_experiment.eye_tracking['likely_blink'] )
    interp_fun = interp1d(x=pupil_time, y=blink, bounds_error=False,
                          fill_value=False, kind='nearest')
    blink_img = interp_fun( activity.time ) > 0

    blinking = xr.DataArray( data = blink_img,
                          coords = [activity.time],
                          name='blinking')


    ### Licking behavior
    img_time = activity.time.data
    lick_times = one_experiment.licks.timestamps

    lick_count = np.zeros_like( img_time ).astype(int)
    for i, lick_time in enumerate(lick_times):
      lick_frame = np.argmin( np.abs( img_time - lick_time))
      lick_count[lick_frame] += 1
    licking_arr = lick_count > 0

    licking = xr.DataArray( data = licking_arr,
                          coords = [activity.time],
                          name='licking')

    ### Rewards
    img_time = activity.time.data
    reward_times = one_experiment.rewards.timestamps
    reward_array = np.zeros_like( img_time ) > 0

    for i, reward_time in enumerate(reward_times):
      reward_frame = np.argmin( np.abs( img_time - reward_time))
      reward_array[ reward_frame ] = True

    reward = xr.DataArray( data = reward_array,
                          coords = [activity.time],
                          name='reward')

    ### Add rewarded trials variable
    trial_frames = stim_details['start_frame']
    rewarded = np.zeros_like( trial_frames ) > 0
    frame_rate = one_experiment.metadata['ophys_frame_rate']

    for i, fr in enumerate(trial_frames):
      if not stim_details['is_change'][i]:
        continue
      part = reward_array[fr:fr+int(frame_rate)]
      rewarded[i] = np.sum( part ) > 0

    rewarded = xr.DataArray( data=rewarded,
                            coords = [images.trial],
                            name='rewarded')
    ### Add warped images
    imgs = one_experiment.stimulus_templates
    inds = imgs.index
    size = imgs.loc[inds[0]].warped.shape

    shown_images = np.zeros((8,size[0], size[1]), dtype=np.uint8)

    for i in range(8):
      shown_images[i] = imgs.loc[inds[i]].warped

    shown_images.shape

    images_warped = xr.DataArray( data = shown_images,
                                  coords = [inds, np.arange(size[0]), np.arange(size[1])],
                                  dims = ['images_id', 'x', 'y'],
                                  name='images')

    data = xr.merge([activity, images, rewarded, running, pupil,
                    blinking, licking, reward, images_warped])


    ##### Add attributes to data
    all_meta = one_experiment.metadata
    meta_data = dict( mouse_id = all_meta['mouse_id'],
                      cre_line = all_meta['cre_line'],
                      ophys_session_id = all_meta['ophys_session_id'],
                      session_type = all_meta['session_type'],
                      frame_rate_Hz = all_meta['ophys_frame_rate'],
                      day_of_experiment = all_meta['date_of_acquisition'].strftime("%Y-%m-%d"),
                      easy_session_number = num,
                      version = 1)

    data.attrs = meta_data

    print('Frame rate:', all_meta['ophys_frame_rate'])

    ##### save xarray
    file_name = '{:03d}_excSession_v1_ophys_{}.nc'.format(num, ophys_id)
    data.to_netcdf(file_name)

  except Exception as e:
    print(e)

  ##### delete temporary files
  path = '/temp/visual-behavior-ophys-1.0.0/behavior_ophys_experiments/behavior_ophys_experiment_{}.nwb'
  for id in experiments:
    os.remove( path.format(id))

# Move files to the own Google Drive folder

Click on the left: Files, Add Drive

In [None]:
import shutil
import glob
from tqdm import tqdm

target_folder = '/content/drive/MyDrive/tmp/session_data/'
files = sorted( glob.glob('0*') )

for file in tqdm(files):
  shutil.move(file, target_folder)