## Tangent space ML

In [1]:
from geomstats.geometry.spd_matrices import SPDMatrices
from geomstats.learning.preprocessing import ToTangentSpace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from data_util import *
import numpy as np

spd_manifold = SPDMatrices(n=12)


mat, all_ids, targets = dn_load_Chapman_ECG()
mat = np.asarray(mat)

INFO: Using numpy backend
INFO: Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO: NumExpr defaulting to 8 threads.


Loading denoised dataset of Chapman Shaoxing 12-lead ECG Data...


100%|██████████| 10646/10646 [00:55<00:00, 192.27it/s] 


In [2]:
cnt = 0
for i in range(len(mat)):
    if not spd_manifold.belongs(mat[i]):
        cnt +=1
print(cnt)
# 9063, 5  (before removing data that do not lie on the manifold)
print(mat.shape)

0
(9058, 12, 12)


In [5]:
lr_pipeline = Pipeline(
    steps=[
        ("trick_tangent_space", ToTangentSpace(geometry=spd_manifold.metric)),
        ("standardscaler", StandardScaler()),
        ("classifier", LogisticRegression(C=1.0, max_iter=100, solver='saga', class_weight='balanced')),
    ]
)



X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr_pipeline.fit(X_train, y_train)

print("Accuracy:{:.2f} ".format(lr_pipeline.score(X_test, y_test)))
y_pred = lr_pipeline.predict(X_test)
cmtx = get_confusion_matrix(y_test=y_test, y_pred=y_pred, target_class_list=["AFIB", "SR", "SB", "ST"])
print(cmtx)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 1, change: 1.00000000
Epoch 2, change: 0.28947072
Epoch 3, change: 0.17816548
Epoch 4, change: 0.14265912
Epoch 5, change: 0.09088513
Epoch 6, change: 0.08315843
Epoch 7, change: 0.06511260
Epoch 8, change: 0.05363750
Epoch 9, change: 0.04365814
Epoch 10, change: 0.03914902
Epoch 11, change: 0.03265806
Epoch 12, change: 0.02898237
Epoch 13, change: 0.02538841
Epoch 14, change: 0.02259576
Epoch 15, change: 0.02007033
Epoch 16, change: 0.01830232
Epoch 17, change: 0.01614148
Epoch 18, change: 0.01465276
Epoch 19, change: 0.01361877
Epoch 20, change: 0.01288452
Epoch 21, change: 0.01251353
Epoch 22, change: 0.01203765
Epoch 23, change: 0.01130266
Epoch 24, change: 0.01097676
Epoch 25, change: 0.01069508
Epoch 26, change: 0.01030659
Epoch 27, change: 0.00980162
Epoch 28, change: 0.00959351
Epoch 29, change: 0.00911842
Epoch 30, change: 0.00896850
Epoch 31, change: 0.00867492
Epoch 32, change: 0.00853978
Epoch 33, change: 0.00812835
Epoch 34, change: 0.00793510
Epoch 35, change: 0.007

In [3]:
from sklearn.neural_network import MLPClassifier
import traceback
import sys

"""
try:
    data_in_ts = ToTangentSpace(geometry=spd_manifold.metric)
    ts_data = data_in_ts.fit(X=mat)
except Exception:
    print(traceback.format_exc())
"""

lr_pipeline = Pipeline(
    steps=[
        ("trick_tangent_space", ToTangentSpace(geometry=spd_manifold.metric)),
        ("standardscaler", StandardScaler()),
        ("classifier", MLPClassifier(hidden_layer_sizes=(30,30,30,30),learning_rate='adaptive',alpha=1.0,max_iter=10000)),
    ]
)




X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr_pipeline.fit(X_train, y_train)


Pipeline(steps=[('trick_tangent_space',
                 ToTangentSpace(geometry=<geomstats.geometry.spd_matrices.SPDAffineMetric object at 0x7fcd3467ac40>)),
                ('standardscaler', StandardScaler()),
                ('classifier',
                 MLPClassifier(alpha=1.0, hidden_layer_sizes=(30, 30, 30, 30),
                               learning_rate='adaptive', max_iter=10000))])

In [4]:
print("Accuracy:{:.2f} ".format(lr_pipeline.score(X_test, y_test)))
y_pred = lr_pipeline.predict(X_test)
cmtx = get_confusion_matrix(y_test=y_test, y_pred=y_pred, target_class_list=["AFIB", "SR", "SB", "ST"])
print(cmtx)

Accuracy:0.52 
[1. 1. 1. 1.]
           pred:AFIB  pred:SR  pred:SB  pred:ST
true:AFIB      0.523    0.011    0.344    0.123
true:SR        0.080    0.163    0.600    0.157
true:SB        0.061    0.049    0.793    0.098
true:ST        0.125    0.120    0.456    0.299


## Riemannian KMeans clustering

In [4]:
from geomstats.learning.kmeans import RiemannianKMeans

kmeans = RiemannianKMeans(spd_manifold.metric, n_clusters=2, max_iter=50, init='kmeans++')  # alter max_iter

# using same test, train split as above. Labels y_train, y_test are assumed to be lost

centroids = kmeans.fit(X_train)
spd_manifold.belongs(centroids)


array([ True,  True])

In [5]:
predicted_labels = kmeans.predict(X_train)
for i in range(10):
    print(predicted_labels[i], "--> ", y_train[i])

0 -->  SR
0 -->  SR
0 -->  AF
1 -->  SR
0 -->  SR
1 -->  AF
1 -->  SR
1 -->  SR
0 -->  AF
1 -->  AF


In [7]:
np.unique(y_test)

array(['AF', 'SR'], dtype='<U2')

# MDM

In [23]:
from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
from geomstats.geometry.spd_matrices import SPDMatrices
from geomstats.geometry.spd_matrices import SPDEuclideanMetric
from geomstats.geometry.spd_matrices import SPDAffineMetric

#spd_manifold = SPDMatrices(n=12)


mdm = RiemannianMinimumDistanceToMean(
    riemannian_metric=SPDEuclideanMetric(n=12))

X = mat
y = targets
X_train, X_test, y_train, y_test = train_test_split(X, y)
mdm.fit(X_train, np.array(y_train))
print(mdm.score(X_train, y_train))
print(X_train.shape)



0.17301750772399588
(6797, 12, 12)
