In [None]:
import os
import itertools

import numpy as np
import pandas as pd
from scipy import linalg
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.cluster import KMeans, DBSCAN, OPTICS
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split

color_iter = itertools.cycle(['navy', 'c', 'cornflowerblue', 'gold',
                              'darkorange'])

# Make directory
plots_directory = "./plots"
if not os.path.exists(plots_directory):
    os.makedirs(plots_directory)
    

In [None]:
df1 = pd.read_csv("../data/heartbeat/ptbdb_normal.csv", header=None)
df2 = pd.read_csv("../data/heartbeat/ptbdb_abnormal.csv", header=None)

df = pd.concat([df1, df2])
df_train, df_test = train_test_split(df, test_size=0.2, random_state=1337, stratify=df[187])


In [None]:
# print(df_train.head(10))
# print(df_train.describe())
# 
# print(df1.head(10))
# print(df1.describe())
# 
# print(df2.head(10))
# print(df2.describe())

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=5)

for i, ax in enumerate(fig.axes):
    df1.iloc[i,0:187].plot(ax=ax, color='orange')
    ax.set_xticks([])
    ax.set_yticks([])
# for i in [0, 5, 10, 15]:
#     fig.axes[i].set_yticks([0, 0.5, 1])
for i in [15, 16, 17, 18, 19]:
    fig.axes[i].set_xticks([0, 100, 187])    
plt.savefig(os.path.join(plots_directory, 'ptbdb_samples1.svg'))
plt.show()


In [None]:
fig, axes = plt.subplots(nrows=4, ncols=5)

for i, ax in enumerate(fig.axes):
    df2.iloc[i,0:187].plot(ax=ax, color='orange')
    ax.set_xticks([])
    ax.set_yticks([])
# for i in [0, 5, 10, 15]:
#     fig.axes[i].set_yticks([0, 0.5, 1])
for i in [15, 16, 17, 18, 19]:
    fig.axes[i].set_xticks([0, 100, 187])    
plt.savefig(os.path.join(plots_directory, 'ptbdb_samples2.svg'))
plt.show()


In [None]:
Y = df_train[187]
X = df_train.iloc[:, :187]

In [None]:
pca = PCA().fit(X)
evr = np.cumsum(pca.explained_variance_ratio_)
cut = evr[evr>=0.99][0]
cut_index = np.where(evr == cut)

In [None]:
n_components = cut_index[0].item()#.item()
ipca = IncrementalPCA(n_components=n_components, batch_size=100)
X_ipca = ipca.fit_transform(X)


In [None]:
km = KMeans(n_clusters=2)
y_pred_km = km.fit_predict(X=X_ipca)

In [None]:
gmm = GaussianMixture(n_components=2, covariance_type='full',
                              max_iter=100).fit(X_ipca)
y_pred_gm = gmm.predict(X_ipca)

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2)
fig.tight_layout(h_pad=3.0)

###%% Explained Variance
axes[0,0].plot(evr)
axes[0,0].plot(cut_index, cut, 'ro')
axes[0,0].set_xticks([0, 50, 100, 150, 187])
axes[0,0].set_xlabel('Number of Components')
axes[0,0].set_ylabel('Variance (%)') #for each component
axes[0,0].set_title("Explained Variance")

###%% Ground Truth
# colors = ['navy', 'turquoise', 'darkorange', 'green', 'red']
# colors = ['#440154', '#3B528B', '#21918B', '#5EC962', '#FDE725']
colors = ['#440154', '#FDE725']

c1 = axes[0,1].scatter(X_ipca[:, 0], X_ipca[:, 1], c=Y, s=2, cmap=mpl.colors.ListedColormap(colors))
axes[0,1].set_xlabel("PC1")
axes[0,1].set_ylabel("PC2")
axes[0,1].set_xticks([])
axes[0,1].set_yticks([])
axes[0,1].set_title("Ground Truth")
cb = fig.colorbar(c1, ax=axes)
cb.set_ticks([0.25, 0.75])
cb.set_ticklabels(['N', 'IM'])

##%% KMeans Classifier
axes[1,0].scatter(X_ipca[:, 0], X_ipca[:, 1], lw=2, c=y_pred_km, s=2, cmap=mpl.colors.ListedColormap(colors))
axes[1,0].set_xlabel("PC1")
axes[1,0].set_ylabel("PC2")
axes[1,0].set_xticks([])
axes[1,0].set_yticks([])
axes[1,0].set_title("K-Means")

##%% Gaussian Mixture Model
axes[1,1].scatter(X_ipca[:, 0], X_ipca[:, 1], lw=2, c=y_pred_gm, s=2, cmap=mpl.colors.ListedColormap(colors))
axes[1,1].set_xlabel("PC1")
axes[1,1].set_ylabel("PC2")
axes[1,1].set_xticks([])
axes[1,1].set_yticks([])
axes[1,1].set_title("GMM")

plt.savefig(os.path.join(plots_directory, 'ptbdb_clustering.png'), dpi=600)
plt.show()