In [2]:
from google_photo_api import GooglePhotoHelper
import json
from PIL import Image
from io import BytesIO
import piexif
import requests
import matplotlib.pyplot as plt
import re
%matplotlib inline 

In [32]:
# methods for reading image from google photo API
def generate_face_dataset_from_google_album(album_name, size):
    '''
    return a list of url / name tuple
    '''
    helper = GooglePhotoHelper()
    album_list = helper.find_albums_by_name('Ada')
    if len(album_list) != 1:
        raise Exception(f'There should be only one album named {album_name}!')
    url_list = helper.list_face_download_urls_from_album(album_list[0]['id'], size=size)
    return url_list
    # data_json_str_list = [json.dumps({"image_url": url, "label": album_name}) for url in url_list]
    # # write json lines to file:
    # with open(album_name + '_face_dataset.json', 'w') as f:
    #     f.writelines(data_json_str_list)

def read_image_info_from_url(url):
    r = requests.get(url)
    image = Image.open(BytesIO(r.content))
    exif_dict = piexif.load(image.info['exif'])
    d = r.headers['content-disposition']
    fname = re.findall("filename=\"(.+)\"", d)[0]
    return image, fname, exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')

# read_image_info_from_url('https://lh3.googleusercontent.com/lr/AGiIYOVkJAGcLO64zWnjDlUsmGiCPXis3GPh12ddfPl2xVkoe66w2ZBCRhn1ssWb2lYcAAwg2sQU8IZ6korV0rKYaQGMmV83HR41W2OdIveNXOQXiSLv0-ropDMYRVzcqcWLj-mrbsvh-xmCHU3yrJHyjWeSkRx-WI1oDYv1Urdx2RKBdLq8P6AtfqMl1-pY3rRuh6rSryHCm985IUqzkFEVzNaScgcnhuRUbTssxI1uMmqAFpIdeB_AlhMzNIjjSzsK8nNMTcFbCV1tWWxNmRbBuvPmm89O12mDwX48RrL7TamdqdiscVta7awGPHTA0e6mKj1EFTtcDTDmFTqaz1FczpbrAyYd3fd9bcInsCMFkVaVAVIhRt2xEFmmPS2IqR7JpaRA1AMZLxCJx8Fh8BwO6Gy5XEyrRQ7n1Wlf--_1eZxPJUUUyFjuZsVNV_aN7ALij4EfwxyohxfQLPghTmRVGsdeNW5cpW2zOhiNlW66YObVYs9sttzIbX72iTkJ2khcdey92_f5fJaaDYPxDRL_RRsIRVqZm-Wznt5HvdQFJtGF63GjVcmp7kEfg4kl650golimA5DpytpSNkLQgrxFhkk8SFW5PsuUdfh89hgFy0ETEEe0sxlXm071uL3pUm70hMMuRSGAFZBl8IIy0I_Q-Y7akpZpYfGEt3vw9TSBDpsmHlhZtQLmEEDX1a47aHaZty_BCy8sGWwW6G2s3v00kqhY3VJVYNPETMIztlwWlt8bvLg0MLWLj3V5d_ypBdFDme73kn-q9EtT3JfngVcv8heqVEOOAD4OKF8Iy0j3VlbCspgCXfaNFz8ZPW-Iwb68HfO7DS_awX7FM5R_hB_6DHTzEkH0CvQzxGjv6L0hEDA4t8rLr4WkWnNm5IGkMkU2Xs4_wxgVHN24vjxvpUMKYTBJIbWefcH6muxDT1JuCp6OOIpMAprL527H4Buv2Wgzp4dlDVpba7pFROKGFTfwdYXeF7SOxvZU1KCRplFwDgPuwIDBwMZ09rqHZ-EAnggaU2Qmuu0N7EaQG2pmf9NlGpRBCJjAa4-U__avpagJpHmvbtZ0jJM=d')

In [31]:
import os
from datetime import datetime
import numpy as np

# image preprocessing
# get the median hight/width ratio of the images
def get_median_height_width_ratio(image_paths):
    height_width_ratios = []
    for image_path in image_paths:
        image = Image.open(image_path)
        width, height = image.size
        height_width_ratios.append(height / width)
    return np.median(height_width_ratios)

def get_roll_pan_tilt_from_image(file_path):
    '''
    read roll angle, pan angle, tilt angle from image object
    '''
    image = Image.open(file_path)
    try:
        exif_dict = piexif.load(image.info['exif'])
        user_comment = exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')
        face_json = json.loads(user_comment)
        return face_json['rollAngle'], face_json['panAngle'], face_json['tiltAngle']
    except KeyError:
        return None, None, None
    

def is_file_valid_face_image(file_path):
    if file_path.endswith('.jpg') and 'auto_detected_face_image_' in file_path:
        image = Image.open(file_path)
        width, height = image.size
        if abs(height / width - 1.162) > 0.1:
            return False
        # parse date from the file name
        date_string = re.findall("auto_detected_face_image_(.+)_\d+_", file_path)[0]
        # image hight/width ratio should be around 1.162
        # print(f"median height/width ratio: {get_median_height_width_ratio(image_paths)}")
        try:
            datetime.strptime(date_string, '%Y:%m:%d')
            roll, pan, tilt = get_roll_pan_tilt_from_image(file_path)
            if roll is None or pan is None or tilt is None:
                return False
            return -10 < roll < 10 and -10 < pan < 10 and -10 < tilt < 10
        except ValueError:
            return False

def get_date_from_image_path(path):
    date_string = re.findall("auto_detected_face_image_(.+)_\d+_", path)[0]
    return datetime.strptime(date_string, '%Y:%m:%d')

def read_image_data_from_path(path):
    '''
    Return the image object, the date object parsed from the name, and the user comment in the Exif
    '''
    image = Image.open(path)
    try:
        exif_dict = piexif.load(image.info['exif'])
        user_comment = exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')
    except KeyError:
        user_comment = '{}'
    return image, get_date_from_image_path(path), user_comment

In [29]:
# valid image selection
from mpl_toolkits.axes_grid1 import ImageGrid

# def show_face_image_grid(dir):
dir = '/Users/lingxiao/Documents/ada_faces'
# fig = plt.figure(figsize=(columns, rows))
# # set the size of the image
# fig.set_size_inches(image_w * columns, image_h * rows)
# read image paths from directory and open with pillow
image_paths = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f)) and is_file_valid_face_image(os.path.join(dir, f))]
print(f"total eligible images: {len(image_paths)}")

# print(image_paths)

def show_image_in_grids(image_paths):
    image_w = 3
    image_h = 3
    columns = 5
    rows = 10
    fig = plt.figure(figsize=(image_w*rows, image_h*columns))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                    nrows_ncols=(rows, columns),  # creates grid of axes
                    axes_pad=0.1,  # pad between axes in inch.
                    )

    for i, ax in enumerate(grid):
        image, date, user_comment = read_image_data_from_path(image_paths[i])
        ax.imshow(image)
        face_json = json.loads(user_comment)
        face_json.pop('boundingPoly')
        face_json.pop('fdBoundingPoly')
        face_json.pop('landmarks')
        anno_string = f"r: {face_json['rollAngle']}\np: {face_json['panAngle']}\nt: {face_json['tiltAngle']}"
        ax.text(10, 200, anno_string, color='black', fontsize=10, bbox = dict(facecolor = 'red', alpha = 0.5))
        ax.set_title(str(date)[:10])
    plt.show()



total eligible images: 646


In [40]:
import imageio
from PIL import ImageDraw
from datetime import timedelta

sample_size = 100
sorted_list_dates = sorted(list([get_date_from_image_path(path) for path in image_paths]))
sorted_path = sorted(image_paths, key=lambda path: get_date_from_image_path(path))

diff_in_day = (sorted_list_dates[-1] - sorted_list_dates[0]).days
diff_per_image = diff_in_day / sample_size
print(f"diff in day: {diff_in_day}, diff per image: {diff_per_image}")

cur_date = sorted_list_dates[0]
sampled_image_paths = []
image_count = 0
i = 0
while i < len(sorted_list_dates) and cur_date < sorted_list_dates[-1] and image_count < sample_size:
    # get the date in the list that is closest to the current date
    i = min(range(i, len(sorted_list_dates)), key=lambda i: abs(sorted_list_dates[i] - cur_date))
    sampled_image_paths.append(sorted_path[i])
    cur_date += timedelta(days=diff_per_image)
    image_count += 1
    i += 1


# create a list of imge objects
sampled_images = [Image.open(path) for path in sampled_image_paths]
# resize the images to 300:(300*1.162)
width = 300
height = int(width * 1.162)
sampled_images = [image.resize((width, height), Image.ANTIALIAS) for image in sampled_images]

# add file name as text to the image for debugging purpose
for i, image in enumerate(sampled_images):
    draw = ImageDraw.Draw(image)
    draw.text((0, 0), sampled_image_paths[i].split('/')[-1], fill=(255, 255, 255))

# return a list of numpy arrays of the image data
sampled_images = [np.array(image) for image in sampled_images]
imageio.mimsave('test.gif', sampled_images, duration=0.05)


diff in day: 486, diff per image: 4.86


  sampled_images = [image.resize((width, height), Image.ANTIALIAS) for image in sampled_images]
