# Code to generate the yearbook embeddings

* First, download data from https://people.eecs.berkeley.edu/~shiry/projects/yearbooks/yearbooks.html
* Untar into a folder called ``yearbook``
* Rename the folder ``faces_aligned_small_mirrored_co_aligned_cropped_cleaned`` to ``faces``.

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
from PIL import Image
from img2vec_pytorch import Img2Vec
from sklearn.metrics.pairwise import rbf_kernel

In [3]:

info = pd.read_csv("yearbook/yb_info.csv")
image_dir = 'yearbook/faces/F'
image_list = [os.path.join(image_dir, name) for name in os.listdir(image_dir) if name.endswith('.png')]

In [4]:
year_list = []
for name in image_list:
    year_list.append(int(name[17:21]))

In [5]:
info = pd.DataFrame(
    {'filename': image_list,
     'year': year_list
    })

In [6]:
info['decade'] = (10*np.floor(info.year/10)).astype(int)
info

Unnamed: 0,filename,year,decade
0,yearbook/faces/F/1980_Missouri_Kansas-City_Pas...,1980,1980
1,yearbook/faces/F/1997_Vermont_Rutland_Rutland_...,1997,1990
2,yearbook/faces/F/1970_Virginia_Arlington_Washi...,1970,1970
3,yearbook/faces/F/1984_California_Grass-Valley_...,1984,1980
4,yearbook/faces/F/1948_Pennsylvania_Pittsburgh_...,1948,1940
...,...,...,...
20243,yearbook/faces/F/1967_Virginia_Arlington_Washi...,1967,1960
20244,yearbook/faces/F/1944_Utah_Millard-County_Topa...,1944,1940
20245,yearbook/faces/F/1938_New-York_Brooklyn_Samuel...,1938,1930
20246,yearbook/faces/F/1937_California_Brentwood_Lib...,1937,1930


In [10]:
img2vec = Img2Vec(cuda=False)
yb_embeddings = np.zeros((len(info), 512))
for i in range(len(info)):
    img = Image.open(info.filename[i])
    # Get a vector from img2vec, returned as a torch FloatTensor
    yb_embeddings[i, :] = img2vec.get_vec(img, tensor=False)

In [11]:
np.savetxt('yb_embeddings_all.csv', yb_embeddings, delimiter=",")