In [1]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
from pyuoi.linear_model import UoI_L1Logistic
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold
%matplotlib inline

In [2]:
data_path = '/Users/psachdeva/data/cv/EC2_blocks_1_8_9_15_76_89_105_CV_AA_ff_align_window_-0.5_to_0.79_none_AA_avg.h5'

In [68]:
f = h5py.File(data_path, 'r')

In [69]:
X_con = f['Xhigh gamma'][:, :, 75]
X_vow = f['Xhigh gamma'][:, :, 175]
cvs = f['y'][:]
vowels = cvs % 3
consonants = np.floor(cvs / 3)

In [70]:
skfold = StratifiedKFold(n_splits=5)
logistic = LogisticRegressionCV(
    Cs=48, fit_intercept=True, cv=skfold, solver='saga',
    penalty='l1', multi_class='multinomial', max_iter=250
)

In [81]:
uoi = UoI_L1Logistic(n_boots_sel=30,
                     n_boots_est=30,
                     fit_intercept=True,
                     standardize=True,
                     selection_frac=0.7,
                     estimation_frac=0.8,
                     n_C=48,
                     multi_class='multinomial',
                     shared_support=False)

# Vowels

In [78]:
logistic.fit(X_vow[:2000], vowels[:2000])

LogisticRegressionCV(Cs=48, class_weight=None,
                     cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                     dual=False, fit_intercept=True, intercept_scaling=1.0,
                     l1_ratios=None, max_iter=250, multi_class='multinomial',
                     n_jobs=None, penalty='l1', random_state=None, refit=True,
                     scoring=None, solver='saga', tol=0.0001, verbose=0)

In [83]:
vow_pred = logistic.predict(X_vow[:2000])
print(np.count_nonzero(vow_pred == vowels[:2000])/vowels[:2000].size)
vow_pred = logistic.predict(X_vow[2000:])
print(np.count_nonzero(vow_pred == vowels[2000:])/vowels[2000:].size)
print(np.count_nonzero(logistic.coef_)/logistic.coef_.size)

0.6615
0.5804195804195804
0.5465116279069767


In [82]:
uoi.fit(X_vow[:2000], vowels[:2000], stratify=vowels[:2000])

UoI_L1Logistic(comm=None, estimation_frac=0.8, estimation_score='acc',
               fit_intercept=True, max_iter=None, multi_class='multinomial',
               n_C=48, n_boots_est=30, n_boots_sel=30,
               random_state=<module 'numpy.random' from '/Users/psachdeva/anaconda3/lib/python3.7/site-packages/numpy/random/__init__.py'>,
               selection_frac=0.7, shared_support=False,
               stability_selection=1.0, standardize=True, tol=0.001,
               warm_start=None)

In [84]:
vow_pred = uoi.predict(X_vow[:2000])
print(np.count_nonzero(vow_pred == vowels[:2000])/vowels[:2000].size)
vow_pred = uoi.predict(X_vow[2000:])
print(np.count_nonzero(vow_pred == vowels[2000:])/vowels[2000:].size)
print(np.count_nonzero(uoi.coef_)/uoi.coef_.size)

0.563
0.5402097902097902
0.1434108527131783


# Consonants

In [87]:
logistic.fit(X_con[:2000], consonants[:2000])



LogisticRegressionCV(Cs=48, class_weight=None,
                     cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                     dual=False, fit_intercept=True, intercept_scaling=1.0,
                     l1_ratios=None, max_iter=250, multi_class='multinomial',
                     n_jobs=None, penalty='l1', random_state=None, refit=True,
                     scoring=None, solver='saga', tol=0.0001, verbose=0)

In [89]:
con_pred = logistic.predict(X_con[:2000])
print(np.count_nonzero(con_pred == consonants[:2000])/consonants[:2000].size)
con_pred = logistic.predict(X_con[2000:])
print(np.count_nonzero(con_pred == consonants[2000:])/consonants[2000:].size)
print(np.count_nonzero(logistic.coef_)/logistic.coef_.size)

0.428
0.2534965034965035
0.3990208078335373


In [92]:
uoi.fit(X_con[:2000], consonants[:2000], stratify=consonants[:2000])

UoI_L1Logistic(comm=None, estimation_frac=0.8, estimation_score='acc',
               fit_intercept=True, max_iter=None, multi_class='multinomial',
               n_C=48, n_boots_est=30, n_boots_sel=30,
               random_state=<module 'numpy.random' from '/Users/psachdeva/anaconda3/lib/python3.7/site-packages/numpy/random/__init__.py'>,
               selection_frac=0.7, shared_support=False,
               stability_selection=1.0, standardize=True, tol=0.001,
               warm_start=None)

In [94]:
con_pred = uoi.predict(X_con[:2000])
print(np.count_nonzero(con_pred == consonants[:2000])/consonants[:2000].size)
con_pred = uoi.predict(X_con[2000:])
print(np.count_nonzero(con_pred == consonants[2000:])/consonants[2000:].size)
print(np.count_nonzero(uoi.coef_)/uoi.coef_.size)

0.254
0.1520979020979021
0.2086903304773562
