In [59]:
# Try to decode which feature was selected per-trial based on firing rates of neurons
# experiment with ranges of firing rates around fixation (selection) time

%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats
from lfp_tools import (
    general as lfp_general,
    startup as lfp_startup,
    development as lfp_development,
    analysis as lfp_analysis
)
from spike_tools import (
    general as spike_general,
    analysis as spike_analysis,
)
import s3fs
import utils.behavioral_utils as behavioral_utils
import utils.spike_utils as spike_utils
import utils.classifier_utils as classifier_utils
from trial_splitters.random_splitter import RandomSplitter
from trial_splitters.group_splitter import GroupSplitter

species = 'nhp'
subject = 'SA'
exp = 'WCST'
session = 20180802  # this is the session for which there are spikes at the moment. 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# grab behavioral data, spike data, trial numbers. 

fs = s3fs.S3FileSystem()
behavior_file = spike_general.get_behavior_path(subject, session)
behavior_data = pd.read_csv(fs.open(behavior_file))
valid_beh = behavior_data[behavior_data.Response.isin(["Correct", "Incorrect"])]
trial_numbers = np.unique(valid_beh.TrialNumber)
spike_data = spike_general.get_spike_times_by_trial(fs, subject, session, trial=trial_numbers, start_field="TrialStart", end_field="TrialEnd")    


In [5]:
# find "fixation times". In this case, 800ms before feedback. 
pre_interval = 800
post_interval = 0
intervals = behavioral_utils.get_trial_intervals(valid_beh, "FeedbackOnset", pre_interval, post_interval)


In [None]:
# find the spikes falling in the intervals
spike_times = spike_general.get_spike_times(fs, subject, session)
spike_by_trial_interval = spike_utils.get_spikes_by_trial_interval(spike_times, intervals)

In [32]:
# grab firing rates, spike counts, in 800ms intervals, 100ms bins
firing_rates = spike_analysis.firing_rate(spike_by_trial_interval, bins=np.arange(0, 0.9, 0.1), smoothing=1)

In [88]:
selections = behavioral_utils.get_selection_features(valid_beh)

In [3]:
feature_selections = pd.read_pickle("../data/feature_selections.pickle")
firing_rates = pd.read_pickle("../data/firing_rates.pickle")

Unnamed: 0,TrialNumber,UnitID,TimeBins,SpikeCounts,FiringRate
0,1.0,0,0.0,0.0,0.000134
1,1.0,0,0.1,0.0,0.004566
2,1.0,0,0.2,0.0,0.058557
3,1.0,0,0.3,0.0,0.300796
4,1.0,0,0.4,1.0,0.708603
...,...,...,...,...,...
825523,1750.0,58,0.3,0.0,0.241971
825524,1750.0,58,0.4,0.0,0.053991
825525,1750.0,58,0.5,0.0,0.004432
825526,1750.0,58,0.6,0.0,0.000134


In [10]:
# train test split on trial numbers


array([   1,    2,    3, ..., 1748, 1749, 1750])

In [47]:
from sklearn.model_selection import train_test_split
from sklearn import svm
# from sklearn.linear_model import LogisticRegression
clf = svm.SVC(decision_function_shape='ovo')
mode = "SpikeCounts"
feature_dim = "Pattern"
inputs = firing_rates.rename(columns={mode: "Value"})
labels = feature_selections.rename(columns={feature_dim: "Feature"})


In [None]:
random_splitter = RandomSplitter(labels.TrialNumber.unique(), 20, 0.2)
train_accs, test_accs, shuffled_accs, models = classifier_utils.evaluate_classifier(clf, inputs, labels, random_splitter)

In [39]:
print(f"Mean Train accuracy: {np.mean(train_accs)}, Test accuracy: {np.mean(test_accs)}, Shuffled accuracy: {np.mean(shuffled_accs)}")


Mean Train accuracy: 0.8247319513938528, Test accuracy: 0.5161428571428572, Shuffled accuracy: 0.252


In [56]:
valid_beh.TrialNumber.unique()

array([   1,    2,    3, ..., 1748, 1749, 1750])

In [55]:

time_bins = np.arange(0.0, 0.8, 0.1)
for bin in time_bins:
    print(bin)
    inputs_for_bin = inputs[inputs["TimeBins"] == bin]
    print(len(inputs_for_bin))
    train_accs, test_accs, shuffled_accs, models = classifier_utils.evaluate_classifier(clf, inputs_for_bin, labels)
    print(f"Mean Train accuracy: {np.mean(train_accs)}, Test accuracy: {np.mean(test_accs)}, Shuffled accuracy: {np.mean(shuffled_accs)}")

0.0
103191
[ 511  850  977 ... 1249 1070 1660]
[1540  559 1711 ...  593 1655 1462]
[1207 1384  133 ...  165 1605 1631]
[ 613 1340  456 ...  683  465 1081]
[ 551  914  271 ... 1198  552 1401]
[ 652   23 1643 ... 1203 1440  653]
[1730 1324  901 ...  721 1249  349]
[ 790 1357 1134 ... 1344  928 1359]
[ 119  418   94 ...  668  524 1593]
[ 190 1592  401 ... 1525  985  791]
[ 341 1090  939 ...  715  945  727]
[   4 1111  826 ...  445  671 1735]
[ 444  390 1013 ...  324  942 1224]
[  22 1370  377 ... 1587 1025  773]
[1582  901 1459 ...   67  960 1675]
[ 433  108 1302 ... 1383  240 1281]
[  74  525  988 ... 1482 1711 1442]
[1331 1063  966 ...   66 1646  166]
[ 942 1319 1101 ... 1059  957 1349]
[ 712 1404  234 ... 1531  943  138]
Mean Train accuracy: 0.5284131522516083, Test accuracy: 0.34199999999999997, Shuffled accuracy: 0.25842857142857145
0.1
103191
[1216  501  505 ... 1539  961 1198]
[ 978  191 1511 ...  567  476  744]
[1141 1734  557 ...  921 1471  101]
[ 205 1258 1255 ...  124   71 1186

KeyboardInterrupt: 

In [54]:

feature_dim = "Color"
color_labels = feature_selections.rename(columns={feature_dim: "Feature"})
time_bins = np.arange(0.0, 0.8, 0.1)
for bin in time_bins:
    print(bin)
    inputs_for_bin = inputs[inputs["TimeBins"] == bin]
    print(len(inputs_for_bin))
    train_accs, test_accs, shuffled_accs, models = classifier_utils.evaluate_classifier(clf, inputs_for_bin, color_labels)
    print(f"Mean Train accuracy: {np.mean(train_accs)}, Test accuracy: {np.mean(test_accs)}, Shuffled accuracy: {np.mean(shuffled_accs)}")

0.0
103191
Mean Train accuracy: 0.5002144388849178, Test accuracy: 0.3047142857142857, Shuffled accuracy: 0.2754285714285714
0.1
103191
Mean Train accuracy: 0.5904217298070049, Test accuracy: 0.37, Shuffled accuracy: 0.25871428571428573
0.2
103191
Mean Train accuracy: 0.576518942101501, Test accuracy: 0.34628571428571425, Shuffled accuracy: 0.26728571428571424
0.30000000000000004
103191
Mean Train accuracy: 0.5366333095067906, Test accuracy: 0.30442857142857144, Shuffled accuracy: 0.2621428571428571
0.4
103191
Mean Train accuracy: 0.5399928520371695, Test accuracy: 0.29271428571428576, Shuffled accuracy: 0.25842857142857145
0.5
103191
Mean Train accuracy: 0.5323802716225876, Test accuracy: 0.30271428571428566, Shuffled accuracy: 0.2611428571428571
0.6000000000000001
103191
Mean Train accuracy: 0.5062544674767692, Test accuracy: 0.276, Shuffled accuracy: 0.25657142857142856
0.7000000000000001
103191
Mean Train accuracy: 0.5196926375982844, Test accuracy: 0.28471428571428564, Shuffled ac