In [30]:
import pandas as pd
import seaborn as sns
sns.set()
import numpy as np
import matplotlib.pyplot as plt
import py_pcha

# Get data

In [5]:
def get_basic_features(path):
    features, feature_names = [], []
    for stat in ["BVP", "HR", "EDA", "TEMP"]:
        signal = pd.read_csv(f"{path}/{stat}.csv")[stat]
        features += [signal.min(), signal.max(), signal.mean(), signal.std()]
        feature_names += [f"{stat}_{f}" for f in ["min", "max", "mean", "std"]]
    return feature_names, features

metadata = pd.read_csv("metadata.csv")
basic_ds = []
for path in metadata['Path']:
    feature_names, features = get_basic_features(path)
    basic_ds.append(features)
    
basic_ds = pd.DataFrame(basic_ds, columns = feature_names)

# Archetypal Analysis

In [29]:
X = basic_ds.copy()
n_components = 3

# Does not require that you center data
XC, S, C, SSE, varexpl = py_pcha.PCHA(np.asarray(X.T), noc=n_components, delta=0.1)
XC = np.asarray(XC)
S = np.asarray(S)
C = np.asarray(C)

'''
    Output
    ------
    XC : numpy.2darray
        I x noc feature matrix (i.e. XC=X[:,I]*C forming the archetypes)

    S : numpy.2darray
        noc X n matrix, S>=0 |S_j|_1=1

    C : numpy.2darray
        x x noc matrix, C>=0 |c_j|_1=1

    SSE : float
        Sum of Squared Errors
'''

X_hat = X.T @ C @ S
L = 0.5*np.linalg.norm(X.T-X_hat)**2
components = XC.T

SST = np.sum(np.sum(Xtrain**2))
print(f"Variance explained: {1-2*L/SST}")

Variance explained: 0.9949744595485853
