In [7]:
from matplotlib.patches import Ellipse
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_rand_score, silhouette_score
from sklearn.mixture import GaussianMixture
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
confile = list(pd.read_csv('../../dbcon.csv'))
postgres_db = 'heartdisease'
db_connection = 'postgresql://{}:{}@{}:{}/{}'.format(confile[0], confile[1], confile[2], confile[3], postgres_db)

query = '''
SELECT *
FROM heartdisease
;'''

heartdisease_df = pd.read_sql(query, db_connection)

X = heartdisease_df.iloc[:, :13]
y = heartdisease_df.iloc[:, 13]

X = X.replace(to_replace='?', value=0)
y = np.where(y > 0, 0, 1)

scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

In [10]:
model = GaussianMixture(n_components=2)
y_pred = model.fit_predict(scaled_X)
display(
    "ARI Score: {n}".format(n=adjusted_rand_score(y, y_pred)),
    "Silhouette Score: {m}".format(m=silhouette_score(scaled_X, y_pred))
)

'ARI Score: 0.18389186035089963'

'Silhouette Score: 0.13628813153331445'

It's performing better than the other previous models

In [15]:
covar_types = ['full', 'tied', 'diag', 'spherical']
score_df = pd.DataFrame(
    columns=['ARI_Score', 'SIL_Score'],
    index=covar_types)
for covar in covar_types:
    model.covariance_type = covar
    y_pred = model.fit_predict(scaled_X)
    score_df.loc[covar, 'ARI_Score'] = adjusted_rand_score(y, y_pred)
    score_df.loc[covar, 'SIL_Score'] = silhouette_score(scaled_X, y_pred)
score_df

Unnamed: 0,ARI_Score,SIL_Score
full,0.420732,0.161186
tied,0.183892,0.136288
diag,0.183892,0.136288
spherical,0.207652,0.124688


Full covariance seems to be the better one, and I would say spherical is the second best