## Tangent space ML

In [1]:
from geomstats.geometry.spd_matrices import SPDMatrices
from geomstats.learning.preprocessing import ToTangentSpace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from data_util import *
import numpy as np

spd_manifold = SPDMatrices(n=12)


mat, all_ids, targets = dn_load_Chapman_ECG()
mat = np.asarray(mat)

INFO: Using numpy backend
INFO: Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO: NumExpr defaulting to 8 threads.


Loading denoised dataset of Chapman Shaoxing 12-lead ECG Data...


100%|██████████| 10646/10646 [00:55<00:00, 193.24it/s] 


In [2]:
cnt = 0
for i in range(len(mat)):
    if not spd_manifold.belongs(mat[i]):
        cnt +=1
print(cnt)
# 9063, 5  (before removing data that do not lie on the manifold)
print(mat.shape)

0
(9058, 12, 12)


In [3]:
lr_pipeline = Pipeline(
    steps=[
        ("trick_tangent_space", ToTangentSpace(geometry=spd_manifold.metric)),
        ("standardscaler", StandardScaler()),
        ("classifier", LogisticRegression(C=1.0, max_iter=100, solver='saga', class_weight='balanced')),
    ]
)

X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr_pipeline.fit(X_train, y_train)

print("Accuracy:{:.2f} ".format(lr_pipeline.score(X_test, y_test)))
y_pred = lr_pipeline.predict(X_test)
cmtx = get_confusion_matrix(y_test=y_test, y_pred=y_pred, target_class_list=["AFIB", "SR", "SB", "ST"])
print(cmtx)

Accuracy:0.48 
           pred:AFIB  pred:SR  pred:SB  pred:ST
true:AFIB      0.661    0.092    0.119    0.128
true:SR        0.123    0.372    0.271    0.234
true:SB        0.161    0.247    0.477    0.115
true:ST        0.245    0.214    0.151    0.391




In [4]:
from sklearn.neural_network import MLPClassifier
import traceback
import sys

"""
try:
    data_in_ts = ToTangentSpace(geometry=spd_manifold.metric)
    ts_data = data_in_ts.fit(X=mat)
except Exception:
    print(traceback.format_exc())
"""

lr_pipeline = Pipeline(
    steps=[
        ("trick_tangent_space", ToTangentSpace(geometry=spd_manifold.metric)),
        ("standardscaler", StandardScaler()),
        ("classifier", MLPClassifier(hidden_layer_sizes=(30,30,30,30),learning_rate='adaptive',alpha=1.0,max_iter=10000)),
    ]
)

X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr_pipeline.fit(X_train, y_train)

print("Accuracy:{:.2f} ".format(lr_pipeline.score(X_test, y_test)))
y_pred = lr_pipeline.predict(X_test)
cmtx = get_confusion_matrix(y_test=y_test, y_pred=y_pred, target_class_list=["AFIB", "SR", "SB", "ST"])
print(cmtx)

Accuracy:0.50 
           pred:AFIB  pred:SR  pred:SB  pred:ST
true:AFIB      0.490    0.022    0.371    0.116
true:SR        0.068    0.118    0.680    0.134
true:SB        0.081    0.050    0.804    0.065
true:ST        0.199    0.101    0.501    0.199


## Riemannian KMeans clustering

In [4]:
from geomstats.learning.kmeans import RiemannianKMeans

kmeans = RiemannianKMeans(spd_manifold.metric, n_clusters=2, max_iter=50, init='kmeans++')  # alter max_iter

# using same test, train split as above. Labels y_train, y_test are assumed to be lost

centroids = kmeans.fit(X_train)
spd_manifold.belongs(centroids)


array([ True,  True])

In [5]:
predicted_labels = kmeans.predict(X_train)
for i in range(10):
    print(predicted_labels[i], "--> ", y_train[i])

0 -->  SR
0 -->  SR
0 -->  AF
1 -->  SR
0 -->  SR
1 -->  AF
1 -->  SR
1 -->  SR
0 -->  AF
1 -->  AF


# MDM

In [5]:
from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
from geomstats.geometry.spd_matrices import *

#spd_manifold = SPDMatrices(n=12)

X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)


mdm = RiemannianMinimumDistanceToMean(
    riemannian_metric=SPDLogEuclideanMetric(n=12))
mdm.fit(X_train, np.array(y_train))
print('Log Euclidean Metric')
print(mdm.score(X_test, y_test))


mdm = RiemannianMinimumDistanceToMean(
    riemannian_metric=SPDEuclideanMetric(n=12))
mdm.fit(X_train, np.array(y_train))
print('\nEuclidean Metric')
print(mdm.score(X_test, y_test))

mdm = RiemannianMinimumDistanceToMean(
    riemannian_metric=SPDBuresWassersteinMetric(n=12))
mdm.fit(X_train, np.array(y_train))
print('\nBures Wasserstein Metric')
print(mdm.score(X_test, y_test))

mdm = RiemannianMinimumDistanceToMean(
    riemannian_metric=SPDAffineMetric(n=12))
mdm.fit(X_train, np.array(y_train))
print('\nAffine Metric')
print(mdm.score(X_test, y_test))

Log Euclidean Metric
0.4194260485651214

Euclidean Metric
0.3726269315673289

Bures Wasserstein Metric
0.3801324503311258

Affine Metric
0.3818984547461369
