# Cell Types Project Master Notebook

In [1]:
#Import the "Cell Types Cache" from the AllenSDK core package
from allensdk.core.cell_types_cache import CellTypesCache

#Import CellTypesApi, which will allow us to query the database.
from allensdk.api.queries.cell_types_api import CellTypesApi

# initialize cache as 'ctc' (cell types cache)
ctc = CellTypesCache(manifest_file='cell_types/manifest.json')

# import tool used to get AP spike data for specimen
from allensdk.ephys.ephys_extractor import EphysSweepFeatureExtractor

import pandas as pd
import numpy as np

# get rid of all the ugly red warnings
import warnings
warnings.filterwarnings('ignore')

The paper used mouse data, so filter to get only mouse specimen.

In [2]:
mouse_cells = ctc.get_cells(species = ['mus musculus'])
mouse_cells_df = pd.DataFrame(mouse_cells)
mouse_ids_df = mouse_cells_df[['id', 'species']].set_index('id')
mouse_ids_df

Unnamed: 0_level_0,species
id,Unnamed: 1_level_1
565871768,Mus musculus
469801138,Mus musculus
605889373,Mus musculus
485909730,Mus musculus
323865917,Mus musculus
...,...
512322162,Mus musculus
488473535,Mus musculus
467003163,Mus musculus
572609946,Mus musculus


Get ephys features of the mouse species only

In [3]:
ephys_features = ctc.get_ephys_features()
ephys_features_df = pd.DataFrame(ephys_features)
ephys_ids_df = ephys_features_df[['specimen_id','rheobase_sweep_number']].set_index('specimen_id')
ephys_ids_df
ephys_ids_df

Unnamed: 0_level_0,rheobase_sweep_number
specimen_id,Unnamed: 1_level_1
529878215,78
548459652,55
579978640,38
439024551,41
515188639,36
...,...
569955172,58
573410831,42
576285618,39
520462275,42


Use inner join to get specimen ids of only mouse species.

Is 'id' in mouse cells df the same as 'specimen id' in ephys data? Assuming yes, we can do the following to get the rheobase sweep number for each mouse specimen

In [4]:
mouse_species_df = mouse_ids_df.join(ephys_ids_df)
mouse_species_df

Unnamed: 0_level_0,species,rheobase_sweep_number
id,Unnamed: 1_level_1,Unnamed: 2_level_1
565871768,Mus musculus,40
469801138,Mus musculus,57
605889373,Mus musculus,53
485909730,Mus musculus,33
323865917,Mus musculus,34
...,...,...
512322162,Mus musculus,44
488473535,Mus musculus,57
467003163,Mus musculus,45
572609946,Mus musculus,45


In [5]:
def calc_spike_width(specimen_id):
    ''' calculates the spike width using specimen id to get the data. returns 0.0 if file does not exist'''
    try:
        data_set = ctc.get_ephys_data(specimen_id)

        sweep_number = mouse_species_df.loc[specimen_id]['rheobase_sweep_number']
        sweep_data = data_set.get_sweep(sweep_number)

        index_range = sweep_data["index_range"]
        i = sweep_data["stimulus"][0:index_range[1]+1] # in A
        v = sweep_data["response"][0:index_range[1]+1] # in V
        i *= 1e12 # to pA
        v *= 1e3 # to mV

        sampling_rate = sweep_data["sampling_rate"] # in Hz
        t = np.arange(0, len(v)) * (1.0 / sampling_rate)

        sweep_ext = EphysSweepFeatureExtractor(t=t, v=v, i=i, start=1.02, end=2.02)
        sweep_ext.process_spikes()

        return round(1e3 * np.nanmean(sweep_ext.spike_feature("width")),4)
    
    except OSError as e:
        return 0.0

Randomly sample 500 rows of mouse species df using `.sample(n)`

In [20]:
# random state is a seed so that the data we get is always the same
# mouse_samples = mouse_species_df.sample(2, random_state = 9)

mouse_samples = mouse_species_df.head(100)
mouse_samples

Unnamed: 0_level_0,species,rheobase_sweep_number
id,Unnamed: 1_level_1,Unnamed: 2_level_1
565871768,Mus musculus,40
469801138,Mus musculus,57
605889373,Mus musculus,53
485909730,Mus musculus,33
323865917,Mus musculus,34
...,...,...
486198953,Mus musculus,38
397243949,Mus musculus,49
583719952,Mus musculus,69
475580568,Mus musculus,43


In [21]:
# reset index to get ids as a column and apply the calculation to it

In [22]:
reset_df = mouse_samples.reset_index()

Calc spike width for the 100 samples

In [23]:
reset_df['spike_width'] = reset_df['id'].apply(calc_spike_width)
reset_df

2020-02-26 19:24:46,514 allensdk.api.api.retrieve_file_over_http INFO     Downloading URL: http://api.brain-map.org/api/v2/well_known_file_download/496390184


KeyboardInterrupt: 

started 5:12:30 end - interrupt 5:21
5:22 - interrept 542

is there a way to get rid of the ones that take forever??

In [18]:
# drop where the spike width is 0.0
reset_df = reset_df[reset_df['spike_width'] != 0.0]
reset_df

Unnamed: 0,id,species,rheobase_sweep_number,spike_width
0,565871768,Mus musculus,40,0.44
1,469801138,Mus musculus,57,0.3283
2,605889373,Mus musculus,53,1.32
3,485909730,Mus musculus,33,0.9393
5,583836069,Mus musculus,28,0.48
7,558076716,Mus musculus,37,1.0733
8,476135066,Mus musculus,37,0.615


We want to add a column to that says whether the neuron is inhibitory or excitatory based on spike width.

Make a helper function to classify by inhibitory or excitatory.

In [19]:
def classify(spike_width):
    #TODO: check if overlap in range an get actual ranges
    # inhibtory range
    if (spike_width <= .9) and (spike_width >= .1):
        return 'inhibitory'
    # excitatory range
    elif (spike_width <= .9) and (spike_width >= .1):
        return 'excitatory'
    else:
        return 'unknown'

In [12]:
reset_df['inhib_excit'] = reset_df['spike_width'].apply(classify)
reset_df

Unnamed: 0,id,species,rheobase_sweep_number,spike_width,inhib_excit
0,565871768,Mus musculus,40,0.44,inhibitory
1,469801138,Mus musculus,57,0.3283,inhibitory
2,605889373,Mus musculus,53,1.32,unknown
3,485909730,Mus musculus,33,0.9393,unknown
5,583836069,Mus musculus,28,0.48,inhibitory
7,558076716,Mus musculus,37,1.0733,unknown


Now, based on type of neuron (inhibitory or excitatory), explore the morphology.

In [13]:
# Get morphology data