In [1]:
import numpy as np
from numpy.random import default_rng

import warnings
from sklearn.svm import LinearSVC
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from src.Decoder import Decoder
from scipy.stats import pearsonr as pearson_correlation
import src.SessionNavigator as SessionNavigator

def make_bins(bin_stop, bin_start, bin_width, stim_table):
        if bin_stop == 0:
            bin_stop = stim_table.duration.mean() + bin_width
        return np.arange(bin_start, bin_stop, bin_width)
    
def initialize_dict(keys, array_size):
    empty_dict = {}
    for key in keys:
        empty_dict[key] = np.zeros(array_size)
    return empty_dict

data_root = "C:/Users/Demogorgon/Documents/College/Marcus/Boston University PhD/Ocker Lab"
manifest_path = f"{data_root}/AllenSDK_Data/manifest.json"
navigator = SessionNavigator.SessionNavigator(manifest_path)
acronyms = ['VISp', 'VISl', 'VISal', 'VISrl', 'VISam', 'VISpm', 'LGd']
stim = 'drifting_gratings_contrast'
session_ids = navigator.find_sessions(acronyms, genotype="wt/wt", session_type="functional_connectivity")
#sessions = [navigator.load_session(session_id) for session_id in session_ids]
session = navigator.load_session(session_ids[0])
stim_table = session.get_stimulus_table(stim)
stim_presentation_ids = stim_table.index.values
stim_characteristic = 'orientation'

all_units = []
for acronym in session.structure_acronyms:
    all_units = all_units + list(session.units[session.units.ecephys_structure_acronym == acronym].index)

classifier = LinearSVC()
bins = make_bins(0.0, 0.0, 0.05, stim_table)

y = np.array(stim_table[stim_characteristic])
for idx in range(len(y)):
    if y[idx] == "null":
        y[idx] = -1.0
stim_modalities = np.unique(y)

x = np.array(session.presentationwise_spike_counts(
                bins, stim_presentation_ids, all_units
            ))

  from pandas.util.testing import assert_frame_equal


In [2]:
# BUG NOTES: There's no held out test data

# Get data information
num_presentations, num_bins, num_units = x.shape
y_true = y.astype(int)

# Initialize everything
weights_by_modality = initialize_dict(
    stim_modalities, (num_bins, num_units)
)
weights_by_cell = initialize_dict(
    all_units, (num_bins, len(stim_modalities))
)
weights_by_bin = {}
accuracies_by_bin = {}

# Train the classifier by bin, then store the resulting weights
for bin in range(num_bins):
    # Get the data for the current time bin
    x_bin = x[:, bin, :]

    x_train, x_test, y_train, y_test = train_test_split(x_bin, y_true, test_size = 0.2)
    
    # Train the classifier
    classifier.fit(x_train, y_train)
    #print(classifier.coef_)
    #accuracies_by_bin[bin] = cross_val_score(classifier, x_bin, y_true)
    #print(classifier.coef_)
    
    # Store the weights, and the classes.
    # The classes must be stored so that the correct set of
    # weights can be associated with the correct stim
    bin_weights = classifier.coef_
    classes = classifier.classes_
    weights_by_bin[bin] = bin_weights

    # FIXED: This needs to be more thoroughly explored (probably with this function isolated to
    # a .ipynb). The accuracies are always 100%. That is clearly wrong. I'm pretty sure it has
    # something to do with the scorer itself (I'm fairly confident the classifier is training
    # correctly, so the only other place to look is the scoring function itself)
    # Try using the cross_val_score function:
    # cross_val_score(classifier, x_bin, y_true, cv=cv_count, scoring=make_scorer(accuracy_score))
    accuracies_by_bin[bin] = metrics.precision_score(
        y_test, classifier.predict(x_test), average="micro"
    )  # classifier.score(x_bin, y_true)

    # Store the weights, sorted by modality
    idx = 0
    for stim in classes:
        weights_by_modality[stim][bin, :] = bin_weights[idx, :]
        idx += 1

# Sort the weights by unit
unit_idx = 0
for unit_id in all_units:
    modality_idx = 0
    for stim in stim_modalities:
        # The modality_idx^th column of that particular cell, which should be every time bin
        # for the stimulus modality indicated by the stim
        weights_by_cell[unit_id][:, modality_idx] = weights_by_modality[stim][
            :, unit_idx
        ]
        modality_idx += 1
    unit_idx += 1


In [None]:
y_hat = classifier.predict(x_bin)