# Silhouettes !

## PCA

In [None]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

sils_df = pd.read_csv("https://github.com/PSAM-5020-2025F-A/5020-utils/raw/refs/heads/main/datasets/image/rev-sils/rev-sils-centered.csv")

xy_cols = [c for c in sils_df.columns if c.startswith(("x", "y"))]
nonxy_cols = [c for c in sils_df.columns if not c.startswith(("x", "y"))]

In [None]:
mpca = PCA(n_components=128).set_output(transform="pandas")

sils_pcs_df = mpca.fit_transform(sils_df[xy_cols]).round(6)
sils_components_df = pd.DataFrame(np.concatenate(([mpca.mean_], mpca.components_), axis=0), columns=xy_cols).round(6)

print(mpca.n_components_, sum(mpca.explained_variance_ratio_))

In [None]:
sils_pcs_df.to_csv(f"../data/sils_LR_pca_pcs.csv", index=False)
sils_components_df.to_csv(f"../data/sils_LR_pca_components.csv", index=False)

## Cluster (align L/R) + PCA

In [None]:
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

sils_df = pd.read_csv("https://github.com/PSAM-5020-2025F-A/5020-utils/raw/refs/heads/main/datasets/image/rev-sils/rev-sils-centered.csv")

xy_cols = [c for c in sils_df.columns if c.startswith(("x", "y"))]
x_cols = [c for c in sils_df.columns if c.startswith("x")]
y_cols = [c for c in sils_df.columns if c.startswith("y")]
nonxy_cols = [c for c in sils_df.columns if not c.startswith(("x", "y"))]

In [None]:
clusters = KMeans(n_clusters=2).fit_predict(sils_df[xy_cols])
cs, cnts = np.unique(clusters, return_counts=True)
c_to_flip = int(np.argmin(cnts))

sils_df.loc[clusters==c_to_flip, x_cols] = (-1 * sils_df.loc[clusters==c_to_flip, x_cols]).values[:,::-1]
sils_df.loc[clusters==c_to_flip, y_cols] = (sils_df.loc[clusters==c_to_flip, y_cols]).values[:, ::-1]

In [None]:
def polar_angle(xy):
  x,y = xy
  return np.arctan2(y, x) + np.pi

def shift_rotate(row):
  off = row["offset"]
  row[xy_cols] = np.roll(row[xy_cols].values, shift=-2*off, axis=0)
  return row

points = sils_df[xy_cols].values.reshape(-1, len(x_cols), 2)
sils_df["offset"] = np.argmin(np.apply_along_axis(polar_angle, axis=2, arr=points), axis=1)
sils_df = sils_df.apply(shift_rotate, axis=1).drop(columns=["offset"])

In [None]:
mpca = PCA(n_components=128).set_output(transform="pandas")

sils_pcs_df = mpca.fit_transform(sils_df[xy_cols]).round(6)
sils_components_df = pd.DataFrame(np.concatenate(([mpca.mean_], mpca.components_), axis=0), columns=xy_cols).round(6)

print(mpca.n_components_, sum(mpca.explained_variance_ratio_))

In [None]:
sils_pcs_df.to_csv(f"../data/sils_RR_pca_pcs.csv", index=False)
sils_components_df.to_csv(f"../data/sils_RR_pca_components.csv", index=False)

## Test PC files

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

sils_df = pd.read_csv("https://github.com/PSAM-5020-2025F-A/5020-utils/raw/refs/heads/main/datasets/image/rev-sils/rev-sils-centered.csv")

xy_cols = [c for c in sils_df.columns if c.startswith(("x", "y"))]
nonxy_cols = [c for c in sils_df.columns if not c.startswith(("x", "y"))]

In [None]:
variant = "RR"
sils_pcs_df = pd.read_csv(f"../data/sils_{variant}_pca_pcs.csv")
sils_components_df = pd.read_csv(f"../data/sils_{variant}_pca_components.csv")

pc_cols = [c for c in sils_pcs_df.columns if c.startswith("pca")]
nonxy_cols = [c for c in sils_pcs_df.columns if c not in pc_cols]

In [None]:
sils_pcs_np = sils_pcs_df.drop(columns=nonxy_cols).values
sils_components_np = sils_components_df.values

In [None]:
idx = 1777

points = (sils_df.loc[idx, xy_cols].values).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

ppoints = (sils_pcs_np[idx] @ sils_components_np[1:] + sils_components_np[0]).reshape(-1, 2)
pxs = ppoints[:,0]
pys = -ppoints[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=2, linestyle="", alpha=1, color="blue")
plt.plot(xs[:2], ys[:2], marker="x", markersize=4, linestyle="", alpha=1, color="black")
plt.plot(pxs+0.6, pys, marker="o", markersize=2, linestyle="", alpha=1, color="red")
plt.plot((pxs+0.6)[:2], pys[:2], marker="x", markersize=4, linestyle="", alpha=1, color="black")
plt.show()

In [None]:
avgPCs = 0.0 * np.ones_like(sils_pcs_np[0])

points = (avgPCs @ sils_components_np[1:] + sils_components_np[0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()