In [1]:
import numpy as np
import scipy.io as sio
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from numpy.matlib import repmat
from sklearn.preprocessing import normalize

%matplotlib inline

In [2]:
# [Vsort,Dsort] = eigsort(V, eigvals)
#
# Sorts a matrix eigenvectors and a array of eigenvalues in order 
# of eigenvalue size, largest eigenvalue first and smallest eigenvalue
# last.
#
# Example usage:
# di, V = np.linarg.eig(L)
# Vnew, Dnew = eigsort(V, di)
#
# Tim Marks 2002

In [3]:
def eigsort(V, eigvals):
    
    # Sort the eigenvalues from largest to smallest. Store the sorted
    # eigenvalues in the column vector lambd.
    lohival = np.sort(eigvals)
    lohiindex = np.argsort(eigvals)
    lambd = np.flip(lohival)
    index = np.flip(lohiindex)
    Dsort = np.diag(lambd)
    
    # Sort eigenvectors to correspond to the ordered eigenvalues. Store sorted
    # eigenvectors as columns of the matrix vsort.
    M = np.size(lambd)
    Vsort = np.zeros((M, M))
    for i in range(M):
        Vsort[:,i] = V[:,index[i]]
    return Vsort, Dsort

In [4]:
# normc(M) normalizes the columns of M to a length of 1.

def normc(Mat):
    return normalize(Mat, norm='l2', axis=0)

In [5]:
df = pd.read_csv("heart_failure_clinical_records_dataset.csv")
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [15]:
mean = df.mean()
mean

age                             60.833893
anaemia                          0.431438
creatinine_phosphokinase       581.839465
diabetes                         0.418060
ejection_fraction               38.083612
high_blood_pressure              0.351171
platelets                   263358.029264
serum_creatinine                 1.393880
serum_sodium                   136.625418
sex                              0.648829
smoking                          0.321070
time                           130.260870
DEATH_EVENT                      0.321070
dtype: float64

In [10]:
Z = df - mean
Z

           age   anaemia  creatinine_phosphokinase  diabetes  \
0    14.166107 -0.431438                  0.160535  -0.41806   
1    -5.833893 -0.431438               7279.160535  -0.41806   
2     4.166107 -0.431438               -435.839465  -0.41806   
3   -10.833893  0.568562               -470.839465  -0.41806   
4     4.166107  0.568562               -421.839465   0.58194   
..         ...       ...                       ...       ...   
294   1.166107 -0.431438               -520.839465   0.58194   
295  -5.833893 -0.431438               1238.160535  -0.41806   
296 -15.833893 -0.431438               1478.160535   0.58194   
297 -15.833893 -0.431438               1831.160535  -0.41806   
298 -10.833893 -0.431438               -385.839465  -0.41806   

     ejection_fraction  high_blood_pressure      platelets  serum_creatinine  \
0           -18.083612             0.648829    1641.970736           0.50612   
1            -0.083612            -0.351171       0.000736          -0.

In [22]:
cov = Z.dot(Z.transpose()) / df.shape[1]
cov

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,289,290,291,292,293,294,295,296,297,298
0,2.086602e+05,1.290980e+03,-1.280085e+07,-6.738205e+06,8.039550e+06,-7.496030e+06,-1.722155e+07,2.408025e+07,1129.302787,1.574415e+07,...,1.599437e+07,-5.225018e+06,-1.646617e+07,1.498387e+07,-1.065623e+07,-1.368757e+07,8.375579e+05,6.045362e+07,-1.558224e+07,1.662557e+07
1,1.290980e+03,4.077050e+06,-2.428708e+05,-2.624595e+05,-2.350314e+05,-2.983232e+05,-1.869128e+05,-1.482521e+05,-236735.197840,-2.557725e+05,...,-1.383025e+05,1.736076e+04,-1.478413e+05,-2.206153e+05,-2.694606e+05,-2.929792e+05,6.919476e+05,8.262963e+05,1.023900e+06,-2.175120e+05
2,-1.280085e+07,-2.428708e+05,7.902812e+08,4.160373e+08,-4.961866e+08,4.628201e+08,1.063165e+09,-1.486382e+09,15340.869626,-9.717884e+08,...,-9.873915e+08,3.224568e+08,1.016379e+09,-9.250124e+08,6.577350e+08,8.448589e+08,-5.182878e+07,-3.731913e+09,9.617315e+08,-1.026371e+09
3,-6.738205e+06,-2.624595e+05,4.160373e+08,2.190244e+08,-2.611997e+08,2.436534e+08,5.596899e+08,-7.824723e+08,16483.420191,-5.115707e+08,...,-5.197897e+08,1.697501e+08,5.350581e+08,-4.869487e+08,3.462605e+08,4.447691e+08,-2.730790e+07,-1.964624e+09,5.062508e+08,-5.403072e+08
4,8.039550e+06,-2.350314e+05,-4.961866e+08,-2.611997e+08,3.115765e+08,-2.905709e+08,-6.675336e+08,9.333044e+08,14882.084295,6.102053e+08,...,6.199871e+08,-2.024720e+08,-6.381652e+08,5.808276e+08,-4.129636e+08,-5.304551e+08,3.247450e+07,2.343160e+09,-6.039645e+08,6.444691e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,-1.368757e+07,-2.929792e+05,8.448589e+08,4.447691e+08,-5.304551e+08,4.947831e+08,1.136588e+09,-1.589036e+09,15722.974760,-1.038903e+09,...,-1.055579e+09,3.447288e+08,1.086576e+09,-9.888914e+08,7.031645e+08,9.032118e+08,-5.541047e+07,-3.989650e+09,1.028146e+09,-1.097249e+09
295,8.375579e+05,6.919476e+05,-5.182878e+07,-2.730790e+07,3.247450e+07,-3.037953e+07,-6.970145e+07,9.737624e+07,-41766.346437,6.363717e+07,...,6.468205e+07,-2.112614e+07,-6.662619e+07,6.058072e+07,-4.314437e+07,-5.541047e+07,3.512974e+06,2.446905e+08,-6.285015e+07,6.722356e+07
296,6.045362e+07,8.262963e+05,-3.731913e+09,-1.964624e+09,2.343160e+09,-2.185543e+09,-5.020553e+09,7.019141e+09,-49605.372772,4.589091e+09,...,4.662755e+09,-1.522740e+09,-4.799630e+09,4.368190e+09,-3.105999e+09,-3.989650e+09,2.446905e+08,1.762310e+10,-4.541662e+09,4.846833e+09
297,-1.558224e+07,1.023900e+06,9.617315e+08,5.062508e+08,-6.039645e+08,5.631763e+08,1.293863e+09,-1.809056e+09,-61239.349347,-1.182804e+09,...,-1.201749e+09,3.924558e+08,1.236942e+09,-1.125857e+09,8.004142e+08,1.028146e+09,-6.285015e+07,-4.541662e+09,1.170814e+09,-1.249214e+09
