In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import numpy as np 
import pickle
import h5py 
import matplotlib.pyplot as plt
import scipy
from scipy import stats

In [31]:
from tqdm import tqdm

In [3]:
from pyuoi.linear_model.var import VAR

In [14]:
from affinewarp import ShiftWarping

### Time warping

In [5]:
f = h5py.File('EC2_hg.h5', 'r')
f.keys()

<KeysViewHDF5 ['X', 'blocks', 'electrode_labels', 'good_channels', 'good_elects', 'good_examples', 'means', 'single_blocks', 'stds', 'vsmc', 'y']>

In [17]:
unique, counts = np.unique(f['y'].value, return_counts=True)
syllables = {}
zsyllables = {}
for syl in unique:
    syl_unnormalized = np.squeeze(f['X'].value[f['y'].value == syl])[:]
    syllables[syl.decode('utf-8')] = np.moveaxis(np.array([zscore(syl_unnormalized[:,:,i], axis=1) 
                                                           for i in range(syl_unnormalized.shape[2])]), 0, -1)
    zsyllables[syl.decode('utf-8')] = np.moveaxis(np.array([stats.zscore(syl_unnormalized[:,:,i], axis=1) 
                                                            for i in range(syl_unnormalized.shape[2])]), 0, -1)

  """Entry point for launching an IPython kernel.
  """


In [7]:
# For each of the desired syllables, fit individual and combined warps

In [18]:
target_cvs = ['baa', 'daa', 'gaa', 'taa', 'thaa', 'saa', 'shaa']

In [25]:
def shift_cv(cv_array, individual=False):
    
    model = ShiftWarping(maxlag=.3, smoothness_reg_scale=10.)
    
    # Fit the model
    if individual:
        shifted_cv_array = np.zeros_like(cv_array)
        for ind in range(shifted_cv_array.shape[2]):
            # Create the model. Add a roughness penalty to the model template.
            model = ShiftWarping(maxlag=.3, smoothness_reg_scale=10.)
            model.fit(np.expand_dims(cv_array[:,:,ind], axis=2), iterations=20, verbose=False)
            shifted_cv_array[:,:,ind] = model.transform(np.expand_dims(cv_array[:,:,ind], axis=2)).squeeze()
    else:
        model.fit(cv_array, iterations=20)
        shifted_cv_array = model.transform(cv_array)
        
    return shifted_cv_array

In [21]:
syllables['baa'].shape

(45, 258, 86)

In [23]:
shift_cv(syllables['baa']).shape

Loss: 2.89: 100%|██████████| 20/20 [00:00<00:00, 20.66it/s]


(45, 258, 86)

In [41]:
shifted_zsyllables_all = {}
shifted_zsyllables_ind = {}
for target_cv in target_cvs:
    shifted_zsyllables_all[target_cv] = shift_cv(zsyllables[target_cv])
    shifted_zsyllables_ind[target_cv] = shift_cv(zsyllables[target_cv], individual=True)

Loss: 0.92: 100%|██████████| 20/20 [00:01<00:00, 19.80it/s]
Loss: 0.91: 100%|██████████| 20/20 [00:01<00:00, 19.51it/s]
Loss: 0.93: 100%|██████████| 20/20 [00:00<00:00, 20.30it/s]
Loss: 0.91: 100%|██████████| 20/20 [00:01<00:00, 17.21it/s]
Loss: 0.88: 100%|██████████| 20/20 [00:01<00:00, 17.25it/s]
Loss: 0.91: 100%|██████████| 20/20 [00:00<00:00, 20.17it/s]
Loss: 0.91: 100%|██████████| 20/20 [00:01<00:00, 16.26it/s]


### Changepoint detection

In [27]:
from dca.data_util import form_lag_matrix

In [28]:
import pdb

In [29]:
# Used to surpress "training is over" output

In [30]:
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

In [33]:
idxs = form_lag_matrix(np.arange(258)[:, np.newaxis], 10)

In [35]:
idxs[]

(249, 10)

In [None]:
idxs

In [37]:
syllables['baa'].shape

(45, 258, 86)

In [59]:
idxs = form_lag_matrix(np.arange(258)[:, np.newaxis], 30).astype(int)

In [60]:
idxs[3, :]

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32])

In [51]:
zsyllables['baa'][:]

numpy.ndarray

In [61]:
zsyllables['baa'][:, list(idxs[0, :]), :].shape

(45, 30, 86)

In [48]:
zsyllables[:, idxs[0, :], :].shape

TypeError: unhashable type: 'slice'

In [67]:
i

0

In [64]:
window_lengths = [20, 30, 40, 50, 75, 100]

results = {}

for cv in tqdm(target_cvs):
    results[cv] = {}
    for i, window_len in tqdm(enumerate(window_lengths)):
        results[cv]['window_length'] = window_len
        results[cv]['zscored_ols'] = []
        results[cv]['zscored_uoi'] = []
        results[cv]['zscored_shifted_all_ols'] = []
        results[cv]['zscored_shifted_all_uoi'] = []
        results[cv]['zscored_shifted_ind_ols'] = []
        results[cv]['zscored_shifted_ind_uoi'] = []

        idxs = form_lag_matrix(np.arange(258)[:, np.newaxis], window_len).astype(int)
        # Subsample the moving windows by a factor of 10
        for j in tqdm(range(0, idxs.shape[0], 10)):

            # Z-scored only
            vmodel1 = VAR(estimator='ols')
            vmodel2 = VAR(estimator='uoi', fit_type='uoi', penalty='l1')

            with suppress_stdout():
                vmodel1.fit(zsyllables[cv][:, list(idxs[j, :]), :])
                vmodel2.fit(zsyllables[cv][:, list(idxs[j, :]), :])

            results[cv]['zscored_ols'].append(np.squeeze(vmodel1.coef_))
            results[cv]['zscored_uoi'].append(np.squeeze(vmodel2.coef_))

            # Z-scored and warped, collectively
            vmodel1 = VAR(estimator='ols')
            vmodel2 = VAR(estimator='uoi', fit_type='uoi', penalty='l1')

            with suppress_stdout():
                vmodel1.fit(shifted_zsyllables_all[cv][:, list(idxs[j, :]), :])
                vmodel2.fit(shifted_zsyllables_all[cv][:, list(idxs[j, :]), :])

            results[cv]['zscored_shifted_all_ols'].append(np.squeeze(vmodel1.coef_))
            results[cv]['zscored_shifted_all_uoi'].append(np.squeeze(vmodel2.coef_))

            vmodel1 = VAR(estimator='ols')
            vmodel2 = VAR(estimator='uoi', fit_type='uoi', penalty='l1')

            with suppress_stdout():
                vmodel1.fit(shifted_zsyllables_ind[cv][:, list(idxs[j, :]), :])
                vmodel2.fit(shifted_zsyllables_ind[cv][:, list(idxs[j, :]), :])

            results[cv]['zscored_shifted_ind_ols'].append(np.squeeze(vmodel1.coef_))
            results[cv]['zscored_shifted_ind_uoi'].append(np.squeeze(vmodel2.coef_))

            

  0%|          | 0/7 [00:00<?, ?it/s]
0it [00:00, ?it/s][A

  0%|          | 0/24 [00:00<?, ?it/s][A[A

  4%|▍         | 1/24 [04:25<1:41:50, 265.68s/it][A[A

  8%|▊         | 2/24 [09:01<1:39:39, 271.81s/it][A[A

 12%|█▎        | 3/24 [13:30<1:34:40, 270.51s/it][A[A

 17%|█▋        | 4/24 [18:07<1:30:56, 272.84s/it][A[A

 21%|██        | 5/24 [22:39<1:26:20, 272.64s/it][A[A

 25%|██▌       | 6/24 [27:08<1:21:22, 271.26s/it][A[A

 29%|██▉       | 7/24 [31:35<1:16:28, 269.89s/it][A[A

 33%|███▎      | 8/24 [35:59<1:11:28, 268.01s/it][A[A

 38%|███▊      | 9/24 [40:25<1:06:50, 267.36s/it][A[A

 42%|████▏     | 10/24 [44:45<1:01:52, 265.21s/it][A[A

 46%|████▌     | 11/24 [49:05<57:05, 263.51s/it]  [A[A

 50%|█████     | 12/24 [53:38<53:38, 268.21s/it][A[A
0it [53:38, ?it/s]
  0%|          | 0/7 [53:38<?, ?it/s]


KeyboardInterrupt: 

In [33]:
import pickle

In [42]:
# Save coefficients away
with open('tw_varcoef.dat', 'wb') as f:
    f.write(pickle.dumps(varmodel2.coef_))
#   f.write(pickle.dumps(varmodel3.coef_))
    f.write(pickle.dumps(varmodel_cons.coef_))
    f.write(pickle.dumps(varmodel_vow.coef_))

In [43]:
# Load
with open('tw_varcoef.dat', 'rb') as ff:
    total_coef = pickle.load(ff)
    cons_coef = pickle.load(ff)
    vowel_coef = pickle.load(ff)

In [12]:
with open('tw_varcoef.dat', 'wb') as ff:
    ff.write(pickle.dumps(coef_sequence))

In [None]:
# Save the coef sequence as a list of dictionaries

In [18]:
np.array(coef_sequence[0]).shape

(25, 1, 54, 54)

In [19]:
datalist = []
for i, coef_seq in enumerate(coef_sequence):
    datalist.append({'T': T[i], 'adj_sequence': np.squeeze(np.array(coef_seq))})

In [20]:
from scipy.io import savemat

In [23]:
savemat('adj_sequence_warped', {'data':datalist})

## DCA Projections

In [25]:
from dca.dca import DynamicalComponentsAnalysis as DCA
from sklearn.decomposition import PCA
from scipy.stats import linregress

In [51]:
cons_X = cons_series
vowel_X = vowel_series
baa = f['baa'][:]

### Entire time series

In [53]:
dca = DCA()
T = [2, 5, 10]
dca_projections = []
for i, t in enumerate(T):
    dca.fit(baa, d=2, T=t)
    dca_projections.append(dca.coef_)

pca = PCA().fit(baa.reshape((-1, 54)))
pca_projs = pca.components_[0:2, :].T

dca_projs = np.array(dca_projections)
total_coef = np.squeeze(total_coef)

In [54]:
# Analysis: Scatter the row sums with their corresponding loadings in the DCA projection
XX = []
YY = []
YY2 = []

dca_params = []
pca_params = []

for i in range(len(dca_projs)):
    xx = []
    yy = []
    yy2 = []
    
    for j in range(total_coef.shape[1]):

        xx.append(np.sum(total_coef[:, j]))
        yy.append(np.sum(np.abs(dca_projs[i, j, :])))
        yy2.append(np.sum(np.abs(pca_projs[j, :])))

    # Normalize onto a 0:1 scale
    yy = np.array(yy)/np.max(yy)
    yy2 = np.array(yy2)/np.max(yy2)        

        
    # Best fit lines
    slope_dca, intercept_dca, r2_dca, _, _ = linregress(xx, yy)
    slope_pca, intercept_pca, r2_pca, _, _ = linregress(xx, yy2)
    
    dca_params.append([slope_dca, intercept_dca, r2_dca])
    pca_params.append([slope_pca, intercept_pca, r2_pca])
    
    XX.append(xx)
    YY.append(yy)
    YY2.append(yy2)

In [56]:
dca_params

[[0.07759060811471215, 0.31292107137713054, 0.10701249312578759],
 [0.11590255424193582, 0.2879243932108232, 0.17643260265408542],
 [0.04927792954949577, 0.5168451503643913, 0.06300945973309172]]

In [57]:
pca_params

[[0.41067036828872033, -0.008299299248973546, 0.5234137927031266],
 [0.41067036828872033, -0.008299299248973546, 0.5234137927031266],
 [0.41067036828872033, -0.008299299248973546, 0.5234137927031266]]

In [58]:
dca = DCA()
T = [2, 5, 10]
dca_projections = []
for i, t in enumerate(T):
    dca.fit(cons_X, d=2, T=t)
    dca_projections.append(dca.coef_)

pca = PCA().fit(cons_X.reshape((-1, 54)))
pca_projs = pca.components_[0:2, :].T

dca_projs = np.array(dca_projections)
cons_coef = np.squeeze(cons_coef)



In [59]:
# Analysis: Scatter the row sums with their corresponding loadings in the DCA projection
XX = []
YY = []
YY2 = []

dca_params = []
pca_params = []

for i in range(len(dca_projs)):
    xx = []
    yy = []
    yy2 = []
    
    for j in range(cons_coef.shape[1]):

        xx.append(np.sum(cons_coef[:, j]))
        yy.append(np.sum(np.abs(dca_projs[i, j, :])))
        yy2.append(np.sum(np.abs(pca_projs[j, :])))

    # Normalize onto a 0:1 scale
    yy = np.array(yy)/np.max(yy)
    yy2 = np.array(yy2)/np.max(yy2)        

        
    # Best fit lines
    slope_dca, intercept_dca, r2_dca, _, _ = linregress(xx, yy)
    slope_pca, intercept_pca, r2_pca, _, _ = linregress(xx, yy2)
    
    dca_params.append([slope_dca, intercept_dca, r2_dca])
    pca_params.append([slope_pca, intercept_pca, r2_pca])
    
    XX.append(xx)
    YY.append(yy)
    YY2.append(yy2)

In [60]:
dca_params

[[0.023988660620121803, 0.2644179387580312, 0.039424226572550486],
 [0.012394706658288103, 0.20582495060055375, 0.02599747129466716],
 [0.08799003603890364, 0.24497295317821857, 0.15230703585096464]]

In [61]:
pca_params

[[0.1672811234240155, 0.10384776046414963, 0.31739306230541986],
 [0.1672811234240155, 0.10384776046414963, 0.31739306230541986],
 [0.1672811234240155, 0.10384776046414963, 0.31739306230541986]]