## Tangent space ML

In [1]:
from geomstats.geometry.spd_matrices import SPDMatrices
from geomstats.learning.preprocessing import ToTangentSpace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from data_util import *
import numpy as np

spd_manifold = SPDMatrices(n=12)

num_classes = 2
target_class_list = ["AF", "SR"]
mat, all_ids, targets = load_Chapman_ECG(num_classes=num_classes, target_class_list=target_class_list)
mat = np.asarray(mat)

INFO: Using numpy backend
INFO: Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO: NumExpr defaulting to 8 threads.


Loading Chapman Shaoxing 12-lead ECG Data... 
Unpacking data for 2 classes only


  0%|                                                                                                         | 0/1920 [00:00<?, ?it/s]

[]





IndexError: list index out of range

In [2]:
lr_pipeline = Pipeline(
    steps=[
        ("trick_tangent_space", ToTangentSpace(geometry=spd_manifold.metric)),
        ("standardscaler", StandardScaler()),
        ("classifier", LogisticRegression(C=3.0, max_iter=3000, solver='saga', class_weight='balanced', warm_start=True)),
    ]
)



X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr_pipeline.fit(X_train, y_train)



Pipeline(steps=[('trick_tangent_space',
                 ToTangentSpace(geometry=<geomstats.geometry.spd_matrices.SPDAffineMetric object at 0x1072dd2e0>)),
                ('standardscaler', StandardScaler()),
                ('classifier',
                 LogisticRegression(C=3.0, class_weight='balanced',
                                    max_iter=3000, solver='saga',
                                    warm_start=True))])

In [3]:
print("Accuracy:{:.2f} ".format(lr_pipeline.score(X_test, y_test)))
y_pred = lr_pipeline.predict(X_train)
cmtx = get_confusion_matrix(y_test=y_train, y_pred=y_pred, target_class_list=["AF", "SR"])
print(cmtx)

Accuracy:0.76 
         pred:AF  pred:SR
true:AF    0.745    0.255
true:SR    0.202    0.798


## Riemannian KMeans clustering

In [4]:
from geomstats.learning.kmeans import RiemannianKMeans

kmeans = RiemannianKMeans(spd_manifold.metric, n_clusters=2, max_iter=50, init='kmeans++')  # alter max_iter

# using same test, train split as above. Labels y_train, y_test are assumed to be lost

centroids = kmeans.fit(X_train)
spd_manifold.belongs(centroids)


array([ True,  True])

In [5]:
predicted_labels = kmeans.predict(X_train)
for i in range(10):
    print(predicted_labels[i], "--> ", y_train[i])

0 -->  SR
0 -->  SR
0 -->  AF
1 -->  SR
0 -->  SR
1 -->  AF
1 -->  SR
1 -->  SR
0 -->  AF
1 -->  AF


In [7]:
np.unique(y_test)

array(['AF', 'SR'], dtype='<U2')