In [1]:
from allensdk.core.cell_types_cache import CellTypesCache
from allensdk.api.queries.cell_types_api import CellTypesApi
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn import linear_model
from sklearn import datasets
from sklearn.feature_selection import RFE
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
file = "/Users/noahjacobs/Triplab/Notebook_old/Test"

In [2]:
# Initialize the cache and dget the metadata for cells, download if neccessary (to current directory)
ctc = CellTypesCache(manifest_file='cell_types/manifest.json')
cells = ctc.get_cells()

# Get the ephys features of all cells, download if neccessary
ephys_features = ctc.get_ephys_features()

ef_df = pd.DataFrame(ephys_features)

# List of all ephys feature names
ephys_features = list(ef_df)

In [3]:
#Drops bad cells

cell_index = {c['id']: c for c in cells}

# Label species, human (Homo Sapiens) or mouse (Mus musculus)
cell_types = [cell_index[cid]['species'] for cid in ef_df['specimen_id']]

ef_df['species'] = pd.Series(cell_types, index=ef_df.index)


# Label cre line
cell_index = {c['id']: c for c in cells}
cre_line = [cell_index[cid]['transgenic_line'] for cid in
                  ef_df['specimen_id']]

ef_df['cre_line'] = pd.Series(cre_line, index=ef_df.index)

# Set label cre line of human cells to 'Human'
for index, row in ef_df.iterrows():
    if "Homo Sapiens" in set(row):
        ef_df.set_value(index, 'cre_line', 'Human')

# Label dendrite type, spiny, aspiny, or sparsely spiny
dendrite_type = [cell_index[cid]['dendrite_type'] for cid in
                  ef_df['specimen_id']]

ef_df['dendrite_type'] = pd.Series(dendrite_type, index=ef_df.index)

#Drop cells without reporter_status = positive
dropped_cells = []
for x in cell_index:
    if cell_index[x]['species'] == 'Mus musculus':
        #print(cell_index[x]['reporter_status'])
        if cell_index[x]['reporter_status'] != 'positive':
            #dropped_cells++
            dropped_cells.append(x)
            
            
for x in dropped_cells:
    cell_index.pop(x)
print('{} cells dropped.'.format(len(dropped_cells)))



105 cells dropped.


In [4]:
# Create a list of features that are to be dropped. Example: specimen id and data containing NaN
dropped_features = ['id', 'rheobase_sweep_id', 'rheobase_sweep_number',
                    'thumbnail_sweep_id', 'seal_gohm','vm_for_sag']
print()
# Find features with missing data
for index, column in ef_df.iteritems():
    na_array = pd.isna(column)
    for values in na_array:
        if values == True:
              if column.name not in dropped_features:
                    dropped_features.append(column.name)


dropped_features.sort()

# Drop the list of ephys features used for PCA.
for dropped_item in dropped_features:
    ephys_features.remove(dropped_item)
ephys_features.remove('specimen_id')

print('{} features dropped.'.format(len(dropped_features)))
print('{} features in dataframe.'.format(len(ephys_features)))

# Drop the features in the main dataframe.
ef_df.drop(columns=dropped_features, axis = 1, inplace = True)



25 features dropped.
30 features in dataframe.


In [5]:
#Creates numeric values for dendrite_type
numeric_dendrite = []

for i in ef_df['dendrite_type']:
    if i == 'spiny':
        numeric_dendrite.append(0)
    elif i == 'aspiny':
        numeric_dendrite.append(1)
    elif i == 'sparsely spiny':
        numeric_dendrite.append(2)
    else:
        numeric_dendrite.append(-1)
    
ef_df.insert(34, "numeric_dendrite_type", numeric_dendrite)
ef_df.to_csv(path_or_buf= file + "/ef_df.csv")

In [6]:
# Mouse dataframe
mouse_ef_df = ef_df[ef_df['species']=='Mus musculus']
# Human dataframe
human_ef_df = ef_df[ef_df['species']=='Homo Sapiens']

print('Total number of human cells with electrophysiology features: {}'.format(len(human_ef_df)))
print('Number of human spiny cells: {}'.format(len(human_ef_df[human_ef_df['dendrite_type']=='spiny'])))
print('Number of human aspiny cells: {}'.format(len(human_ef_df[human_ef_df['dendrite_type']=='aspiny'])))
print('Number of human sparsely spiny cells: {}\n'.format(len(human_ef_df[human_ef_df['dendrite_type']=='sparsely spiny'])))

print('Total number of mouse cells with electrophysiology features: {}'.format(len(mouse_ef_df)))
print('Number of mouse spiny cells: {}'.format(len(mouse_ef_df[mouse_ef_df['dendrite_type']=='spiny'])))
print('Number of mouse aspiny cells: {}'.format(len(mouse_ef_df[mouse_ef_df['dendrite_type']=='aspiny'])))
print('Number of mouse sparsely spiny cells: {}'.format(len(mouse_ef_df[mouse_ef_df['dendrite_type']=='sparsely spiny'])))

Total number of human cells with electrophysiology features: 413
Number of human spiny cells: 321
Number of human aspiny cells: 79
Number of human sparsely spiny cells: 13

Total number of mouse cells with electrophysiology features: 1920
Number of mouse spiny cells: 892
Number of mouse aspiny cells: 921
Number of mouse sparsely spiny cells: 107


In [7]:
csv2 = pd.read_excel(file + "/e-types_Allen.xlsx")
e_type = ef_df.merge(csv2,on='specimen_id')
e_type = e_type.dropna(axis = 0, subset=['e-type'])

m_type= ef_df.merge(csv2,on='specimen_id')
m_type = m_type.dropna(axis = 0, subset=['m-type'])

allen_e_type = csv2.loc[:,csv2.columns.intersection(['specimen_id','e-type'])]
allen_m_type = csv2.loc[:,csv2.columns.intersection(['specimen_id','m-type'])]
allen_me_type = csv2.loc[:,csv2.columns.intersection(['specimen_id','me-type'])]

In [8]:
mouse_e_type = e_type[e_type['species']=='Mus musculus']
human_e_type = e_type[e_type['species']=='Homo Sapiens']

In [9]:
# download all morphology features for cells with reconstructions
jmorph_features = ctc.get_morphology_features()

# convert to a pandas DataFrame
jmorph_features = pd.DataFrame(jmorph_features)

species_id = ef_df.loc[:, ef_df.columns.intersection(['specimen_id','species'])]

jmorph_features = jmorph_features.drop(columns=['hausdorff_dimension','average_bifurcation_angle_remote',
                                              'neuron_reconstruction_type','superseded','tags','id'])

morph_features = jmorph_features.merge(species_id,on='specimen_id')

mouse_morph = morph_features[morph_features['species']=='Mus musculus']
human_morph = morph_features[morph_features['species']=='Homo Sapiens']

pred_morph_features = list(morph_features)
pred_morph_features.remove('specimen_id')
pred_morph_features.remove('species')

In [10]:
#creates variables for models which use ephys and morph data
pred_ephys_features = ephys_features.copy()
all_features = jmorph_features.merge(ef_df, on='specimen_id')
mouse_all = all_features[all_features['species']=='Mus musculus']
human_all = all_features[all_features['species']=='Homo Sapiens']
pred_all_features = list(set(pred_ephys_features + pred_morph_features))
                              

In [11]:
#Dendrite model creation

pred_ephys_features = ephys_features.copy()
print(pred_ephys_features)

X = mouse_ef_df.loc[:, pred_ephys_features].values
y = mouse_ef_df.loc[:, 'dendrite_type'].values

e_dendrite_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
e_dendrite_model.fit(X,y)
print('Creating model for predicting dendrite_type. Testing on mouse data:')


e_dendrite_model.score(X, y)

['f_i_curve_slope', 'fast_trough_t_long_square', 'fast_trough_t_short_square', 'fast_trough_v_long_square', 'fast_trough_v_short_square', 'has_burst', 'has_delay', 'has_pause', 'input_resistance_mohm', 'latency', 'peak_t_long_square', 'peak_t_short_square', 'peak_v_long_square', 'peak_v_short_square', 'ri', 'sag', 'tau', 'threshold_i_long_square', 'threshold_i_short_square', 'threshold_t_long_square', 'threshold_t_short_square', 'threshold_v_long_square', 'threshold_v_short_square', 'trough_t_long_square', 'trough_t_short_square', 'trough_v_long_square', 'trough_v_short_square', 'upstroke_downstroke_ratio_long_square', 'upstroke_downstroke_ratio_short_square', 'vrest']




Creating model for predicting dendrite_type. Testing on mouse data:


0.8572916666666667

In [12]:
pred_ephys_features2 = pred_ephys_features.copy()
pred_ephys_features.append('numeric_dendrite_type')
pred_all_features.append('numeric_dendrite_type')
#pred_morph_features.append('numeric_dendrite_type')
'''pred_all_features.dropna()
pred_ephys_features.dropna()
pred_morph_features.dropna()'''



'pred_all_features.dropna()\npred_ephys_features.dropna()\npred_morph_features.dropna()'

In [13]:
#Cre_line model creation
N = mouse_ef_df.loc[:, pred_ephys_features].values
b = mouse_ef_df.loc[:, 'cre_line'].values

e_cre_line_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter = e_cre_line_model.fit(StandardScaler().fit_transform(N),b)
print('Creating model for predicting creline. Testing on mouse data:')
e_cre_line_model.score(StandardScaler().fit_transform(N), b)




Creating model for predicting creline. Testing on mouse data:




0.47760416666666666

In [14]:
#using ephys to predict e_type
temp = mouse_ef_df.merge(allen_e_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_ephys_features].values
y = temp.loc[:, 'e-type'].values

e_e_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = e_e_type_model.fit(StandardScaler().fit_transform(X),y)

e_e_type_model.score(StandardScaler().fit_transform(X), y)



0.769520732364028

In [15]:
#model using morph to predict e_type
temp = mouse_morph.merge(allen_e_type,on='specimen_id')
temp = temp.dropna()


X = temp.loc[:, pred_morph_features].values
y = temp.loc[:, 'e-type'].values

#m_e_type_model uses morphology to predict e-type

m_e_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = m_e_type_model.fit(StandardScaler().fit_transform(X),y)

m_e_type_model.score(StandardScaler().fit_transform(X), y)



0.5582329317269076

In [16]:
#model using morph and ephys to predict e_type
temp = mouse_all.merge(allen_e_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_all_features].values
y = temp.loc[:, 'e-type'].values

me_e_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=1000)
fiter2 = me_e_type_model.fit(StandardScaler().fit_transform(X),y)

me_e_type_model.score(StandardScaler().fit_transform(X), y)



0.893574297188755

In [17]:
#model using ephys to predict m_type
temp = mouse_ef_df.merge(allen_m_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_ephys_features].values
y = temp.loc[:, 'm-type'].values

e_m_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = e_m_type_model.fit(StandardScaler().fit_transform(X),y)

e_m_type_model.score(StandardScaler().fit_transform(X), y)



0.5124716553287982

In [18]:
#model using morph to predict m_type
temp = mouse_morph.merge(allen_m_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_morph_features].values
y = temp.loc[:, 'm-type'].values

m_m_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = m_m_type_model.fit(StandardScaler().fit_transform(X),y)

m_m_type_model.score(StandardScaler().fit_transform(X), y)



0.5381062355658198

In [19]:
#model using morph and ephys to predict m_type
temp = mouse_all.merge(allen_m_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_all_features].values
y = temp.loc[:, 'm-type'].values

me_m_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = me_m_type_model.fit(StandardScaler().fit_transform(X),y)

me_m_type_model.score(StandardScaler().fit_transform(X), y)



0.7413394919168591

In [20]:
#model using ephys to predict me_type
temp = mouse_ef_df.merge(allen_me_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_ephys_features].values
y = temp.loc[:, 'me-type'].values

e_me_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = e_me_type_model.fit(StandardScaler().fit_transform(X),y)

e_me_type_model.score(StandardScaler().fit_transform(X), y)



0.663594470046083

In [21]:
#model using morph to predict me_type
temp = mouse_morph.merge(allen_me_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_morph_features].values
y = temp.loc[:, 'me-type'].values

m_me_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = m_me_type_model.fit(StandardScaler().fit_transform(X),y)

m_me_type_model.score(StandardScaler().fit_transform(X), y)



0.5633802816901409

In [22]:
#model using morph and ephys to predict me_type
temp = mouse_all.merge(allen_me_type,on='specimen_id')
temp = temp.dropna()

X = temp.loc[:, pred_all_features].values
y = temp.loc[:, 'me-type'].values

me_me_type_model = linear_model.LogisticRegression(penalty='l1', solver='saga', max_iter=100)
fiter2 = me_me_type_model.fit(StandardScaler().fit_transform(X),y)

me_me_type_model.score(StandardScaler().fit_transform(X), y)



0.8145539906103286

In [23]:
#Create DataFrame for storing predicted values
ephys_human_predictions = human_ef_df.loc[:, human_ef_df.columns.intersection(['specimen_id'])]
morph_human_predictions = human_morph.loc[:,human_morph.columns.intersection(['specimen_id'])]
all_human_predictions = human_all.loc[:,human_morph.columns.intersection(['specimen_id'])]
ephys_human_feat = human_ef_df.loc[:, pred_ephys_features].values
morph_human_feat = human_morph.loc[:, pred_morph_features].values
all_human_feat = human_all.loc[:, pred_all_features].values

In [24]:
#Dendrite human test 
predicted_ys = e_dendrite_model.predict(human_ef_df.loc[:, pred_ephys_features2].values)

ephys_human_predictions.insert(0, 'predicted dendrite', predicted_ys, True) 

ephys_human_predictions.insert(1,'real dendrite', ef_df['dendrite_type'], True)

metrics.accuracy_score(human_ef_df.loc[:, 'dendrite_type'], predicted_ys)

0.9152542372881356

In [25]:
#Creline human predictions

predicted_ys = e_cre_line_model.predict(ephys_human_feat)

ephys_human_predictions.insert(2, 'ephys predicted creline', predicted_ys, True)

predicted_ys = e_cre_line_model.predict(mouse_ef_df.loc[:, pred_ephys_features].values)
print(predicted_ys)

['Chrna2-Cre_OE25' 'Chrna2-Cre_OE25' 'Chrna2-Cre_OE25' ...
 'Chrna2-Cre_OE25' 'Chrna2-Cre_OE25' 'Chrna2-Cre_OE25']


In [26]:
#Calculating e-type predictions using ephys on humans
predicted_ys = e_e_type_model.predict(ephys_human_feat)
ephys_human_predictions.insert(3, 'ephys e-type predictions', predicted_ys, True)

In [27]:
#Calculating e-type predictions using morph on humans
predicted_ys = m_e_type_model.predict(morph_human_feat)
morph_human_predictions.insert(1, 'morph e-type predictions', predicted_ys, True)

In [28]:
#Calculating e-type predictions using ephys and morph on humans
predicted_ys = me_e_type_model.predict(all_human_feat)
all_human_predictions.insert(1, 'ephys and morph e-type predictions', predicted_ys, True)

In [29]:
#Calculating m-type predictions using ephys on humans
predicted_ys = e_m_type_model.predict(ephys_human_feat)
ephys_human_predictions.insert(3, 'ephys m-type predictions', predicted_ys, True)

In [30]:
#Calculating m-type predictions using morph on humans
predicted_ys = m_m_type_model.predict(morph_human_feat)
morph_human_predictions.insert(1, 'morph m-type predictions', predicted_ys, True)

In [31]:
#Calculating m-type predictions using ephys and morph on humans
predicted_ys = me_m_type_model.predict(all_human_feat)
all_human_predictions.insert(1, 'ephys and morph m-type predictions', predicted_ys, True)

In [32]:
#Calculating me-type predictions using ephys on humans
predicted_ys = e_me_type_model.predict(ephys_human_feat)
ephys_human_predictions.insert(3, 'ephys me-type predictions', predicted_ys, True)

In [33]:
#Calculating me-type predictions using morph on humans
predicted_ys = m_me_type_model.predict(morph_human_feat)
morph_human_predictions.insert(1, 'morph me-type predictions', predicted_ys, True)

In [34]:
#Calculating me-type predictions using ephys and morph on humans
predicted_ys = me_me_type_model.predict(all_human_feat)
all_human_predictions.insert(1, 'ephys and morph me-type predictions', predicted_ys, True)

In [35]:
#ephys_human_predictions.remove('species')
human_predictions = ephys_human_predictions.merge(morph_human_predictions,on='specimen_id',how='outer')
human_predictions = human_predictions.merge(all_human_predictions,on='specimen_id',how='outer')
human_predictions.to_csv(path_or_buf=file + "/predicted.csv")