In [134]:
import matplotlib.pyplot as plt
import numpy as np
from pyuoi.linear_model import UoI_L1Logistic
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [2]:
data_path = '/Users/psachdeva/data/berkes/IM-1015_170520_GP_rates_v4.mat'

In [3]:
data = loadmat(data_path, struct_as_record=False)

In [4]:
trials = data['Trials'][0]
good_trials = np.arange(trials.size)
good_trials = np.delete(good_trials, [0, 250])
n_trials = good_trials.size

good_units = np.array([4,5,6,7,8,9,12,13,14,15,16,17,18,19,20,25,26,27]) - 1
n_units = good_units.size

In [6]:
X = np.zeros((n_trials, n_units))
y = np.zeros(n_trials)

In [126]:
for t_idx, trial_idx in enumerate(good_trials):
    trial = trials[trial_idx]
    # get trial label
    y[t_idx] = trial.Evt[0, 0].Cond - 1
    
    # get cue times
    center_cue_t = np.asscalar(trial.CenterCueEvent)
    center_in_t = np.asscalar(trial.CenterInEvent)

    for u_idx, unit_idx in enumerate(good_units):
        unit = trial.Units[0, unit_idx]
        
        spike_times = unit.spkTimes
        spike_count = np.count_nonzero(
            (spike_times >= center_in_t) & (spike_times <= center_in_t + 1.)
        )
        X[t_idx, u_idx] = spike_count
            

In [127]:
scaler = StandardScaler()
X_new = scaler.fit_transform(np.sqrt(X))

In [145]:
logistic = LogisticRegressionCV(
    Cs=100, fit_intercept=True, cv=10,
    penalty='l1', solver='saga',
    max_iter=5000
)

In [153]:
logistic.fit(X_new, y)

LogisticRegressionCV(Cs=100, class_weight=None, cv=10, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=5000,
           multi_class='warn', n_jobs=None, penalty='l1',
           random_state=None, refit=True, scoring=None, solver='saga',
           tol=0.0001, verbose=0)

In [148]:
logistic.score(X_new, y)

0.8379629629629629

In [137]:
uoi = UoI_L1Logistic(
    n_boots_sel=30,
    n_boots_est=30,
    selection_frac=0.8,
    estimation_frac=0.8,
    max_iter=5000
)

In [138]:
uoi.fit(X, y)













UoI_L1Logistic(comm=None, copy_X=True, estimation_frac=0.8,
        estimation_score='acc', fit_intercept=True, max_iter=None, n_C=48,
        n_boots_est=30, n_boots_sel=30, normalize=True, random_state=None,
        selection_frac=0.8, stability_selection=1.0, warm_start=None)

In [139]:
uoi.coef_

array([[ 0.        ,  0.        ,  0.02573922,  0.        ,  0.        ,
         0.        ,  0.        , -0.03596846,  0.14243798,  0.        ,
         0.        ,  0.06525721,  0.        ,  0.        ,  0.        ,
        -0.02429558, -0.08060181,  0.        ]], dtype=float32)

In [149]:
logistic.coef_ = np.copy(uoi.coef_)

In [150]:
logistic.score(X_new, y)

0.7546296296296297

In [154]:
logistic.coef_

array([[ 0.06408676,  0.17037553,  0.35701484,  0.02623582,  0.13154725,
         0.22528708,  0.18741554, -0.07051725,  1.09520214,  0.11484704,
         0.21860137,  0.46904082,  0.1130015 ,  0.        ,  0.15625944,
        -0.03205884, -0.49703614,  0.        ]])

In [152]:
logistic.intercept_

array([1.54840849])