# Face classifier

## Set up

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Load data

In [None]:
data = pd.read_csv("../data/age_gender.csv")

## EDA

In [None]:
data.shape

In [None]:
data.head()

In [None]:
data.info()

In [None]:
sns.set_style("whitegrid")

fig, axs = plt.subplots(1,3, figsize=(12,4))
sns.histplot(data = data, 
             x="age", 
             binrange=(0,120),
             bins=30,
             ax=axs[0]
             )
axs[0].set_title("Age distribution")

sns.countplot(data = data, 
             x="ethnicity", 
             ax=axs[1]
             )
axs[1].set_title("Ethnicity distribution")

sns.countplot(data = data, 
             x="gender", 
             ax=axs[2]
             )
axs[2].set_title("Gender distribution")
plt.tight_layout()
plt.show()

In [None]:
def pxlstring2pxlvec(df: pd.DataFrame, idx: int) -> np.ndarray:
    """
    Takes in a string of space-separated integers and converts it into
    an 1D array of integers corresponding to the vectorized array of
    pixel values.
    """
    pxl_str = df["pixels"][idx]
    pxl_list = pxl_str.split(" ")
    n_pxls = len(pxl_list)

    img_dim = np.sqrt(n_pxls)
    assert float(int(img_dim)) == img_dim
    img_dim = int(img_dim)

    img = np.array(pxl_list, dtype=int)
    return img
    
def pxlvec2pxlarray(pxlvec: np.ndarray) -> np.ndarray:
    """
    Takes in a pixel vector and converts it into an array of integers
    corresponding to the array of pixel values.
    """
    vec_dim = pxlvec.shape[0]
    arr_dim = int(np.sqrt(vec_dim))
    return pxlvec.reshape(arr_dim, arr_dim)

In [None]:
fig, axs = plt.subplots(3,5, figsize=(10,6))

for i in range(3):
    for j in range(5):
        img = pxlvec2pxlarray(pxlstring2pxlvec(data,i*2000+j))
        axs[i,j].imshow(img, interpolation = "nearest", cmap="gray")
        axs[i,j].axis("off")
        axs[i,j].set_title(f"Image #{i*2000+j}")

fig.suptitle("Example images")
plt.show()