In [1]:
## requirements
# allensdk
# scikit-learn > 0.19

In [2]:
from __future__ import print_function
from allensdk.core.brain_observatory_cache import BrainObservatoryCache

In [3]:
boc = BrainObservatoryCache()

In [4]:
experiment_id = 541206592

# Initializations:
dataset = boc.get_ophys_experiment_data(experiment_id)


2018-07-26 16:42:25,864 allensdk.api.api.retrieve_file_over_http INFO     Downloading URL: http://api.brain-map.org/api/v2/well_known_file_download/541457182


In [5]:
import pandas as pd
import numpy as np

from allensdk.brain_observatory.natural_scenes import NaturalScenes
ns = NaturalScenes(dataset)

In [6]:
images = ns.stim_table.copy()
images.head()

Unnamed: 0,frame,start,end
0,92,16125,16132
1,27,16133,16140
2,52,16140,16147
3,37,16148,16155
4,103,16155,16162


In [7]:
mean_sweep_response = ns.mean_sweep_response.copy()
mean_sweep_response.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,145,146,147,148,149,150,151,152,153,dx
0,2.750398,3.113332,3.283231,1.03566,2.312769,1.00532,1.795959,1.302906,1.109441,2.644591,...,1.204343,3.223873,-0.614828,1.594057,2.538169,9.322724,3.246232,17.838305,44.883263,-0.016407
1,5.472741,4.520462,1.848134,1.50907,3.900594,2.375818,0.627758,0.452645,-1.94657,-0.088816,...,3.157544,2.147983,3.462305,1.155841,3.599893,10.790494,3.700325,41.864319,55.052734,0.012626
2,4.938696,1.872071,0.822514,-0.36655,0.590227,0.577107,-0.431362,3.239566,-2.953792,0.519713,...,2.204452,0.433436,1.692538,4.946897,-2.181524,1.229501,1.446858,19.081219,-0.731454,0.023797
3,0.686303,-1.502568,-2.128904,-0.779033,-3.098761,0.632175,2.272473,4.147108,-4.235024,-0.116484,...,1.661621,-1.256703,0.234285,5.248888,-1.481765,-3.012385,0.283391,-6.771274,-18.988461,0.003431
4,-2.763241,-2.317277,-1.518564,-1.131271,-3.716857,0.189942,1.049816,0.004382,-3.94232,-2.070535,...,0.76898,0.71363,4.319497,1.107409,0.044746,-4.763652,-0.807627,-15.687251,-24.453199,0.022718


In [8]:
# We don't want dx for this. goodbye!
mean_sweep_response.drop('dx',axis=1,inplace=True)

# And let's rename the columns
neurons = pd.Series(
    ns.cell_id,
    name='neuron',
)
mean_sweep_response.columns = neurons
mean_sweep_response.head()

neuron,541510267,541510270,541510307,541510405,588381938,541510410,541511183,541510394,588381886,541511196,...,588381999,541510679,541509977,541510142,541509981,541509952,541510950,541511172,541509957,541511118
0,2.750398,3.113332,3.283231,1.03566,2.312769,1.00532,1.795959,1.302906,1.109441,2.644591,...,3.245388,1.204343,3.223873,-0.614828,1.594057,2.538169,9.322724,3.246232,17.838305,44.883263
1,5.472741,4.520462,1.848134,1.50907,3.900594,2.375818,0.627758,0.452645,-1.94657,-0.088816,...,5.05127,3.157544,2.147983,3.462305,1.155841,3.599893,10.790494,3.700325,41.864319,55.052734
2,4.938696,1.872071,0.822514,-0.36655,0.590227,0.577107,-0.431362,3.239566,-2.953792,0.519713,...,0.117338,2.204452,0.433436,1.692538,4.946897,-2.181524,1.229501,1.446858,19.081219,-0.731454
3,0.686303,-1.502568,-2.128904,-0.779033,-3.098761,0.632175,2.272473,4.147108,-4.235024,-0.116484,...,-3.436971,1.661621,-1.256703,0.234285,5.248888,-1.481765,-3.012385,0.283391,-6.771274,-18.988461
4,-2.763241,-2.317277,-1.518564,-1.131271,-3.716857,0.189942,1.049816,0.004382,-3.94232,-2.070535,...,-3.320092,0.76898,0.71363,4.319497,1.107409,0.044746,-4.763652,-0.807627,-15.687251,-24.453199


In [14]:
# get features and output
X = mean_sweep_response
y = images['frame']

In [15]:
# split training & testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2, 
    stratify=y,
)

In [23]:
# do the classification
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(
    multi_class='ovr',
)

In [24]:
classifier.fit(X_train,y_train)

  " = {}.".format(self.n_jobs))


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=-1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [27]:
classifier.score(X_test,y_test) * len(np.unique(y))

11.2

In [28]:
from sklearn.decomposition import PCA
reducer = PCA(n_components=2)

In [30]:
X_train_reduced = reducer.fit_transform(X_train)
print(X_train.shape, X_train_reduced.shape)

(4760, 154) (4760L, 2L)


In [32]:
X_test_reduced = reducer.transform(X_test)
print(X_test.shape, X_test_reduced.shape)

(1190, 154) (1190L, 2L)


In [33]:
classifier.fit(X_train_reduced,y_train)
classifier.score(X_test_reduced,y_test) * len(np.unique(y))

  " = {}.".format(self.n_jobs))


2.2

In [34]:
from sklearn.pipeline import Pipeline

In [37]:
pipeline = Pipeline((
    ('reducer', PCA(n_components=2)),
    ('classifier', LogisticRegression()),
))

In [38]:
pipeline.fit(X_train,y_train)
pipeline.score(X_test,y_test) * len(np.unique(y))

2.2



In [46]:
from sklearn.grid_search import GridSearchCV
from sklearn.decomposition import NMF
from sklearn.neighbors import KNeighborsClassifier

pipeline = Pipeline((
    ('reduce_dim', PCA()),
    ('classify', LogisticRegression()),
))

N_FEATURES_OPTIONS = [4, 8, 16,]

param_grid = [
    {
        'reduce_dim__n_components': N_FEATURES_OPTIONS,
        'classify': [LogisticRegression(), KNeighborsClassifier()]
    },
]

In [47]:
grid = GridSearchCV(pipeline, param_grid)

In [48]:
grid.fit(X_train, y_train)
grid.score(X_test, y_test) * len(np.unique(y))

9.100000000000001

In [49]:
grid.best_estimator_

Pipeline(memory=None,
     steps=[('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=16, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('classify', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])