# Fonts !

## PCA

### PCA and components per character

In [None]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

fonts_ordered_df = pd.read_csv("./csv/fonts_480_ordered.csv")

xy_cols = [c for c in fonts_ordered_df.columns if c.startswith(("x", "y"))]
nonxy_cols = [c for c in fonts_ordered_df.columns if not c.startswith(("x", "y"))]
char_list = np.sort(fonts_ordered_df["char"].unique()).tolist()
nrows = len(fonts_ordered_df)
nfonts = nrows//len(char_list)
npoints = len(xy_cols)//2

In [None]:
fonts_pca_df = pd.DataFrame()
fonts_components_df = pd.DataFrame()

for c in char_list:
  mpca = PCA(n_components=min(nfonts, npoints)).set_output(transform="pandas")
  char_df = fonts_ordered_df[(fonts_ordered_df["char"] == c)]
  char_pca_df = mpca.fit_transform(char_df.drop(columns=nonxy_cols))
  char_pca_df = pd.concat((char_df[nonxy_cols], char_pca_df), axis=1)
  fonts_pca_df = pd.concat((fonts_pca_df, char_pca_df), axis=0)

  char_components_df = pd.DataFrame(np.concatenate(([mpca.mean_], mpca.components_)), columns=xy_cols)
  char_components_df.insert(0, "char", c)
  fonts_components_df = pd.concat((fonts_components_df, char_components_df), axis=0)

In [None]:
fonts_pca_df = fonts_pca_df.sort_index()
fonts_pca_df = fonts_pca_df.round({c:6 for c in fonts_pca_df.columns if c not in nonxy_cols})
fonts_components_df = fonts_components_df.round({c:6 for c in fonts_ordered_df.columns if c not in nonxy_cols}).reset_index(drop=True)

In [None]:
fonts_pca_df.to_csv(f"./csv/fonts_{npoints}_pca_char_pcs.csv", index=False)
fonts_components_df.to_csv(f"./csv/fonts_{npoints}_pca_char_components.csv", index=False)

### All PCA and components at once

In [None]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

fonts_ordered_df = pd.read_csv("./csv/fonts_480_ordered.csv")

xy_cols = [c for c in fonts_ordered_df.columns if c.startswith(("x", "y"))]
nonxy_cols = [c for c in fonts_ordered_df.columns if not c.startswith(("x", "y"))]
char_list = np.sort(fonts_ordered_df["char"].unique()).tolist()
nrows = len(fonts_ordered_df)
nfonts = nrows//len(char_list)
npoints = len(xy_cols)//2

In [None]:
mpca = PCA(n_components=640).set_output(transform="pandas")

fonts_pca_df = mpca.fit_transform(fonts_ordered_df.drop(columns=nonxy_cols)).round(6)
fonts_pca_df = pd.concat((fonts_ordered_df[nonxy_cols], fonts_pca_df), axis=1)

fonts_components_df = pd.DataFrame(np.concatenate(([mpca.mean_], mpca.components_), axis=0), columns=xy_cols).round(6)

In [None]:
fonts_pca_df.to_csv(f"./csv/fonts_{npoints}_pca_pcs.csv", index=False)
fonts_components_df.to_csv(f"./csv/fonts_{npoints}_pca_components.csv", index=False)

### Test Chars

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

fonts_pca_df = pd.read_csv("./csv/fonts_480_pca_char_pcs.csv")
fonts_components_df = pd.read_csv("./csv/fonts_480_pca_char_components.csv")

xy_cols = [c for c in fonts_components_df.columns if c.startswith(("x", "y"))]
nonxy_cols = [c for c in fonts_components_df.columns if not c.startswith(("x", "y"))]
char_list = np.sort(fonts_components_df["char"].unique()).tolist()

In [None]:
char_components = {}

for c in char_list:
  char_components[c] = fonts_components_df[fonts_components_df["char"] == c].drop(columns=nonxy_cols).values

In [None]:
avgPCs = 0.0 * np.ones((1,41))
ml = "A"
points = (avgPCs @ char_components[ml][1:] + char_components[ml][0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()

### Test PCs

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

fonts_pcs_df = pd.read_csv("./csv/fonts_480_pca_pcs.csv")
fonts_components_df = pd.read_csv("./csv/fonts_480_pca_components.csv")

xy_cols = [c for c in fonts_components_df.columns if c.startswith(("x", "y"))]
pc_cols = [c for c in fonts_pcs_df.columns if c.startswith("pca")]
nonxy_cols = [c for c in fonts_pcs_df.columns if c not in pc_cols]
char_list = np.sort(fonts_pcs_df["char"].unique()).tolist()

In [None]:
fonts_pcs_np = fonts_pcs_df.drop(columns=nonxy_cols).values
fonts_components_np = fonts_components_df.values

In [None]:
ml = "A"
idx = char_list.index(ml) + 62 * 0
points = (fonts_pcs_np[idx] @ fonts_components_np[1:] + fonts_components_np[0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()

In [None]:
avgPCs = 0.0 * np.ones_like(fonts_pcs_np[0])

points = (avgPCs @ fonts_components_np[1:] + fonts_components_np[0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()

In [None]:
ml = "T"
mlAvg = fonts_pcs_df[fonts_pcs_df["char"] == ml].drop(columns=nonxy_cols).mean(axis=0).values

points = (mlAvg @ fonts_components_np[1:] + fonts_components_np[0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()

In [None]:
mf = "ibmserif"
mfAvg = fonts_pcs_df[fonts_pcs_df["font"] == mf].drop(columns=nonxy_cols).mean(axis=0).values

points = (mfAvg @ fonts_components_np[1:] + fonts_components_np[0]).reshape(-1, 2)
xs = points[:,0]
ys = -points[:,1]

plt.axis("equal")
plt.plot(xs, ys, marker="o", markersize=4, linestyle="", alpha=0.5)
plt.show()