# Step 2: Creation of summary statistics

We will use the resulting simulations to create PCA basis functions. For that, we load the traces from step 1.

Because the traces are greater than 2GB, we use a workaround.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
from support_files.pickle_macos import pickle_load
from box import Box
%matplotlib inline

mats = pickle_load('support_files/pow1_mats_comp_lfs.pkl')
mats = Box(mats)

protocols = ['ap', 'act', 'inact', 'deact', 'ramp']

Let's do the PCA:

In [None]:
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.preprocessing import StandardScaler

N = mats.ap.data.shape[0]
N_pcs = 10

pcas = Box()

In [None]:
for protocol in protocols:    
    v = mats[protocol].data[:,6:,1:].reshape(N, -1).T
    
    # Standardizing the features
    v = StandardScaler().fit_transform(v)

    skpca = sklearnPCA(n_components=N_pcs)
    skpcs = skpca.fit_transform(v)

    for c in range(skpcs.shape[1]):
        skpcs[:,c] = skpcs[:,c]/np.linalg.norm(skpcs[:,c])

    pcas[protocol] = Box()
    pcas[protocol]['pcs'] = skpcs
    pcas[protocol]['explained_var'] = np.sum(skpca.explained_variance_ratio_*100)

    print('{}: explained variance {:.1f}%'.format(protocol, pcas[protocol]['explained_var']))

In [None]:
pickle.dump(pcas, open('./support_files/pow1_sumstats_lfs.pkl', 'wb'))

In [None]:
!cp ./support_files/pow1_sumstats_lfs.pkl model/pca/.

... and look at a reconstruction:

In [None]:
matching = [s for s in mats['ap'].names if "2488" in s]
idx_2488kv = mats['ap'].names.index('2488_kv')

# 2488kv reconstructions
plt.figure(figsize=(19, 4)); i=0
for protocol in protocols:   
    a = pcas[protocol].pcs
    a = np.hstack((a, np.ones((a.shape[0], 1))))   
    b = mats[protocol].data[idx_2488kv, 6:,1:]
    P = b.shape[1]
    x, _, _, _ = np.linalg.lstsq(a, b.reshape(-1), rcond=None)

    i += 1
    plt.subplot(1, 5, i)
    plt.title(protocol)
    plt.plot(mats[protocol].data[0, 6:,0], b, color='b')
    plt.plot(mats[protocol].data[0, 6:,0], np.dot(a, x).reshape(-1, P), alpha=0.8, color='r')
    
    if protocol == 'act':
        pass
    if protocol == 'inact':
        pass

Make sure to copy the `pow1_sumstats.pkl` to `model/pca/pow1_sumstats.pkl`.

## PCA testing

In [None]:
np.random.seed(44)
X = np.random.rand(100, 5)
X_std = StandardScaler().fit_transform(X)
    
pca = sklearnPCA(n_components=2)
X_reduced = pca.fit_transform(X_std)

# np
x = X_std
cov = np.cov(x, rowvar = False)
evals , evecs = np.linalg.eigh(cov)
idx = np.argsort(evals)[::-1]
evecs = evecs[:,idx]
evals = evals[idx]
a = np.dot(x, evecs) 


for c in range(pca.n_components_):
    print('component: {}'.format(c))
    print('eigenvalue: {}'.format(pca.singular_values_[c]))
    print('eigenvector (norm={:.3f}): {}'.format(np.linalg.norm(pca.components_[:,c]), pca.components_[:,c]))
    print('dim red X norm={:.3f}\n'.format(np.linalg.norm(X_reduced[:,c])))
    
    print(pca.pcs)