In [None]:
# for machine learning
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

# for data handling
import pandas as pd

# for visualization
import matplotlib.pyplot as plt

In [None]:
# bring in the dataset
myopia_df = pd.read_csv("https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-classroom/v1.1/21-Deep-Learning/myopia.csv")

# extract the labels for later use
labels = myopia_df["MYOPIC"]

# remove the output
myopia_df = myopia_df.drop("MYOPIC", axis = 1)
myopia_df.head()

In [None]:
# standardize the dataset
myopia_scaled = StandardScaler().fit_transform(myopia_df)

In [None]:
# reduce the dataset's features with PCA
pca = PCA(n_components = 0.99)
myopia_pca = pca.fit_transform(myopia_scaled)
myopia_pca_df = pd.DataFrame( data = myopia_pca, columns = [f"pc{x}" for x in range(myopia_pca.shape[1])])
myopia_pca_df.head()

In [None]:
# further reduce the dataset's features with t-SNE
tsne = TSNE(learning_rate = 35)
tsne_features = tsne.fit_transform(myopia_pca_df)
tsne_features.shape

In [None]:
# recombine the t-SNE features with the dataset
myopia_pca_df["x"] = tsne_features[:,0]
myopia_pca_df["y"] = tsne_features[:,1]

In [None]:
plt.scatter(myopia_pca_df["x"], myopia_pca_df["y"])
plt.show()

In [None]:
plt.scatter(myopia_pca_df["x"], myopia_pca_df["y"], c = labels)
plt.show()

In [None]:
# inertia = []
# k = [i for i in range(10)]

# for i in k:
#     km = KMeans(n_clusters = i, random_state = 1)
#     km.fit(myopia_pca_df)
#     inertia.append(km.inertia_)

# elbow_df = pd.DataFrame({ "k": k, "inertia": inertia })
# plt.plot(elbow_df["k"], elbow_df["inertia"])
# plt.xticks(k)
# plt.title("Elbow Curve")
# plt.xlabel("Cluster #")
# plt.ylabel("Inertia")
# plt.show()