In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline
ROOT = ".."
df = pd.read_csv(f"{ROOT}/data_calculated/Fe2_descriptor.csv")
DESCRIPTOR_NAMES = ['a0.70_rp2.40', 'a0.70_rp3.00', 'a0.70_rp3.60',
                    'a0.70_rp4.20', 'a0.70_rp4.80', 'a0.70_rp5.40']
KEY_NAME = "polytype"
ans_list = df[KEY_NAME].values


In [None]:
METADATA = {"outputdir": "image_executed", "prefix": "clustering_Fe2", 
              "dataname": "Fe2"}

In [None]:
from sklearn.preprocessing import StandardScaler
Xraw = df[DESCRIPTOR_NAMES].values
scaler = StandardScaler()
scaler.fit(Xraw)
X = scaler.transform(Xraw)


In [None]:
from sklearn.cluster import KMeans
nclusters = 3
km = KMeans(nclusters)
km.fit(X)
yp_km = km.predict(X)


In [None]:
from sklearn.decomposition import PCA
drd = PCA(2)
drd.fit(X)
X_PCA = drd.transform(X)


In [None]:
from clustering_misc import plot_X2_ystring
plot_X2_ystring(X_PCA, ans_list, yp_km, comment="km", metadata=METADATA)


In [None]:
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(nclusters)
gmm.fit(X)
yp_gmm = gmm.predict(X)


In [None]:
plot_X2_ystring(X_PCA, ans_list, yp_gmm, comment="gmm", metadata=METADATA)


In [None]:
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
metric = 'euclidean'
pairdistance = pdist(X, metric=metric)
method = 'single'
Z = linkage(pairdistance, method=method)


In [None]:
from scipy.cluster.hierarchy import dendrogram
import os
fig, ax = plt.subplots(figsize=(5, 10))
label_list = df["key"].values
tree = dendrogram(Z, labels=label_list, orientation="left", ax=ax)
ax.invert_yaxis()
fig.tight_layout()
filename = "_".join([METADATA["prefix"], "dendrogram"])+".png"
print(filename)
plt.savefig(os.path.join(METADATA["outputdir"], filename))

In [None]:
from scipy.spatial.distance import squareform
corr = 1- np.abs(df[DESCRIPTOR_NAMES].corr()) 
pairdistance = squareform(corr)

Z = linkage(pairdistance)
fig, ax = plt.subplots(figsize=(5,3))
tree = dendrogram(Z, labels=DESCRIPTOR_NAMES, orientation="left", ax=ax)
filename = "_".join([METADATA["prefix"], "descriptor","dendrogram"])+".png"
print(filename)
fig.tight_layout()
fig.savefig(os.path.join(METADATA["outputdir"], filename))