In [2]:
from google_photo_api import GooglePhotoHelper
import json
from PIL import Image
from io import BytesIO
import piexif
import requests
import matplotlib.pyplot as plt
import re
%matplotlib inline 

In [32]:
# methods for reading image from google photo API
def generate_face_dataset_from_google_album(album_name, size):
    '''
    return a list of url / name tuple
    '''
    helper = GooglePhotoHelper()
    album_list = helper.find_albums_by_name('Ada')
    if len(album_list) != 1:
        raise Exception(f'There should be only one album named {album_name}!')
    url_list = helper.list_face_download_urls_from_album(album_list[0]['id'], size=size)
    return url_list
    # data_json_str_list = [json.dumps({"image_url": url, "label": album_name}) for url in url_list]
    # # write json lines to file:
    # with open(album_name + '_face_dataset.json', 'w') as f:
    #     f.writelines(data_json_str_list)

def read_image_info_from_url(url):
    r = requests.get(url)
    image = Image.open(BytesIO(r.content))
    exif_dict = piexif.load(image.info['exif'])
    d = r.headers['content-disposition']
    fname = re.findall("filename=\"(.+)\"", d)[0]
    return image, fname, exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')

# read_image_info_from_url('https://lh3.googleusercontent.com/lr/AGiIYOVkJAGcLO64zWnjDlUsmGiCPXis3GPh12ddfPl2xVkoe66w2ZBCRhn1ssWb2lYcAAwg2sQU8IZ6korV0rKYaQGMmV83HR41W2OdIveNXOQXiSLv0-ropDMYRVzcqcWLj-mrbsvh-xmCHU3yrJHyjWeSkRx-WI1oDYv1Urdx2RKBdLq8P6AtfqMl1-pY3rRuh6rSryHCm985IUqzkFEVzNaScgcnhuRUbTssxI1uMmqAFpIdeB_AlhMzNIjjSzsK8nNMTcFbCV1tWWxNmRbBuvPmm89O12mDwX48RrL7TamdqdiscVta7awGPHTA0e6mKj1EFTtcDTDmFTqaz1FczpbrAyYd3fd9bcInsCMFkVaVAVIhRt2xEFmmPS2IqR7JpaRA1AMZLxCJx8Fh8BwO6Gy5XEyrRQ7n1Wlf--_1eZxPJUUUyFjuZsVNV_aN7ALij4EfwxyohxfQLPghTmRVGsdeNW5cpW2zOhiNlW66YObVYs9sttzIbX72iTkJ2khcdey92_f5fJaaDYPxDRL_RRsIRVqZm-Wznt5HvdQFJtGF63GjVcmp7kEfg4kl650golimA5DpytpSNkLQgrxFhkk8SFW5PsuUdfh89hgFy0ETEEe0sxlXm071uL3pUm70hMMuRSGAFZBl8IIy0I_Q-Y7akpZpYfGEt3vw9TSBDpsmHlhZtQLmEEDX1a47aHaZty_BCy8sGWwW6G2s3v00kqhY3VJVYNPETMIztlwWlt8bvLg0MLWLj3V5d_ypBdFDme73kn-q9EtT3JfngVcv8heqVEOOAD4OKF8Iy0j3VlbCspgCXfaNFz8ZPW-Iwb68HfO7DS_awX7FM5R_hB_6DHTzEkH0CvQzxGjv6L0hEDA4t8rLr4WkWnNm5IGkMkU2Xs4_wxgVHN24vjxvpUMKYTBJIbWefcH6muxDT1JuCp6OOIpMAprL527H4Buv2Wgzp4dlDVpba7pFROKGFTfwdYXeF7SOxvZU1KCRplFwDgPuwIDBwMZ09rqHZ-EAnggaU2Qmuu0N7EaQG2pmf9NlGpRBCJjAa4-U__avpagJpHmvbtZ0jJM=d')

In [154]:
import os
from datetime import datetime
import numpy as np

# image preprocessing
# get the median hight/width ratio of the images
def get_median_height_width_ratio(image_paths):
    height_width_ratios = []
    for image_path in image_paths:
        image = Image.open(image_path)
        width, height = image.size
        height_width_ratios.append(height / width)
    return np.median(height_width_ratios)

def get_roll_pan_tilt_from_image(file_path):
    '''
    read roll angle, pan angle, tilt angle from image object
    '''
    image = Image.open(file_path)
    try:
        exif_dict = piexif.load(image.info['exif'])
        user_comment = exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')
        face_json = json.loads(user_comment)
        return face_json['rollAngle'], face_json['panAngle'], face_json['tiltAngle']
    except KeyError:
        return None, None, None
    

def is_file_valid_face_image(file_path):
    if file_path.endswith('.jpg') and 'auto_detected_face_image_' in file_path:
        image = Image.open(file_path)
        width, height = image.size
        if abs(height / width - 1.162) > 0.08:
            return False
        # parse date from the file name
        date_string = re.findall("auto_detected_face_image_(.+)_\d+_", file_path)[0]
        # image hight/width ratio should be around 1.162
        # print(f"median height/width ratio: {get_median_height_width_ratio(image_paths)}")
        try:
            datetime.strptime(date_string, '%Y:%m:%d')
            roll, pan, tilt = get_roll_pan_tilt_from_image(file_path)
            if roll is None or pan is None or tilt is None:
                return False
            return -6 < pan < 6 and -6 < tilt < 6
        except ValueError:
            return False

def get_date_from_image_path(path):
    date_string = re.findall("auto_detected_face_image_(.+)_\d+_", path)[0]
    return datetime.strptime(date_string, '%Y:%m:%d')

def read_image_data_from_path(path):
    '''
    Return the image object, the date object parsed from the name, and the user comment in the Exif
    '''
    image = Image.open(path)
    try:
        exif_dict = piexif.load(image.info['exif'])
        user_comment = exif_dict['Exif'][piexif.ExifIFD.UserComment].decode('utf-8')
    except KeyError:
        user_comment = '{}'
    return image, get_date_from_image_path(path), user_comment

In [149]:
# valid image selection
from mpl_toolkits.axes_grid1 import ImageGrid

# def show_face_image_grid(dir):
dir = '/Users/lingxiao/Documents/ada_faces'
# fig = plt.figure(figsize=(columns, rows))
# # set the size of the image
# fig.set_size_inches(image_w * columns, image_h * rows)
# read image paths from directory and open with pillow
image_paths = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f)) and is_file_valid_face_image(os.path.join(dir, f))]
print(f"total eligible images: {len(image_paths)}")

# print(image_paths)


total eligible images: 397


In [156]:
import imageio
from PIL import ImageDraw, ImageFont
from datetime import timedelta
import math

def uniform_sample_from_image_paths(image_paths, sample_size):
    '''
    sample images from the image paths with uniform distribution based on dates
    '''

    sorted_list_dates = sorted(list([get_date_from_image_path(path) for path in image_paths]))
    sorted_path = sorted(image_paths, key=lambda path: get_date_from_image_path(path))

    diff_in_day = (sorted_list_dates[-1] - sorted_list_dates[0]).days
    diff_per_image = diff_in_day / sample_size
    print(f"diff in day: {diff_in_day}, diff per image: {diff_per_image}")

    cur_date = sorted_list_dates[0]
    sampled_image_paths = []
    image_count = 0
    i = 0
    while i < len(sorted_list_dates) and cur_date < sorted_list_dates[-1] and image_count < sample_size:
        # get the date in the list that is closest to the current date
        i = min(range(i, len(sorted_list_dates)), key=lambda i: abs(sorted_list_dates[i] - cur_date))
        sampled_image_paths.append(sorted_path[i])
        cur_date += timedelta(days=diff_per_image)
        image_count += 1
        i += 1
    return sampled_image_paths

def read_landmark_x_y_from_user_comment(user_comment, landmark_type):
    face_json = json.loads(user_comment)
    landmarks = face_json['landmarks']
    boundingpoly_x = face_json['boundingPoly']['vertices'][0].get('x', 0) 
    boundingpoly_y = face_json['boundingPoly']['vertices'][0].get('y', 0)
    for landmark in landmarks:
        if landmark['type'] == landmark_type:
            return landmark['position']['x']-boundingpoly_x, landmark['position']['y']-boundingpoly_y
    return None, None

def align_image(image, user_comment, file_path):
    left_eye_x, left_eye_y = read_landmark_x_y_from_user_comment(user_comment, 'LEFT_EYE')
    right_eye_x, right_eye_y = read_landmark_x_y_from_user_comment(user_comment, 'RIGHT_EYE')
    if left_eye_x is None or left_eye_y is None or right_eye_x is None or right_eye_y is None:
        return None
    # crop the image so the eyes are in the same relative position
    eye_distance = ((right_eye_x - left_eye_x)**2 + (right_eye_y - left_eye_y)**2)**0.5
    eye_center_x = (right_eye_x + left_eye_x) / 2
    eye_center_y = (left_eye_y + right_eye_y) / 2
    crop_left = eye_center_x - eye_distance * 1.42
    crop_right = eye_center_x + eye_distance * 1.42
    image_h = image.size[1]
    crop_up = eye_center_y - image_h * 0.45
    crop_down = eye_center_y + image_h * 0.48
    image = image.crop((crop_left, crop_up, crop_right, crop_down))        
    
    # rotate the image
    angle = math.atan((right_eye_y - left_eye_y) / (right_eye_x - left_eye_x)) * 180 / math.pi
    image = image.rotate(angle, expand=False, center=(eye_center_x, eye_center_y))

    # resize the image to the same size
    width = 300
    height = int(width * 1.162)
    image = image.resize((width, height))

    # add file name as text to the image for debugging purpose
    draw = ImageDraw.Draw(image)
    comment_json = json.loads(user_comment)
    comment_json.pop('landmarks')
    comment_json.pop('boundingPoly')
    comment_json.pop('fdBoundingPoly')
    file_name = os.path.basename(file_path)
    position_json_str = file_name + '\n' + json.dumps(comment_json, indent=4)
    font = ImageFont.truetype("Apple Symbols.ttf", 15)
    draw.text((0, 0), position_json_str, 
              fill=(255, 255, 255),
              font=font)

    return image

def generate_gif_from_image_paths(image_paths, gif_path, duration):
    images = []
    user_comments = []
    for path in image_paths:
        image, _, user_comment = read_image_data_from_path(path)
        images.append(image)
        user_comments.append(user_comment)

    # align the images based on the position of the eyes
    images = [align_image(image, user_comment, path) for image, user_comment, path in zip(images, user_comments, image_paths)]
    # remove the images that cannot be aligned
    images = [image for image in images if image is not None]
    
    # return a list of numpy arrays of the image data
    images = [np.array(image) for image in images]
    imageio.mimsave(gif_path, images, duration=duration)


sample_size = 100
samepled_image_paths = uniform_sample_from_image_paths(image_paths, sample_size)
print(f"sampled images: {len(samepled_image_paths)}")
generate_gif_from_image_paths(samepled_image_paths, 'test.gif', 0.1)

diff in day: 498, diff per image: 4.98
sampled images: 99


In [155]:
# data analysis on image landmarks

def show_image_in_grids(columns, rows, image_paths):
    image_w = 3
    image_h = 3
    fig = plt.figure(figsize=(image_w*rows, image_h*columns))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                    nrows_ncols=(rows, columns),  # creates grid of axes
                    axes_pad=0.1,  # pad between axes in inch.
                    )

    # sample images from paths based on grid size
    sample_image_paths = np.random.choice(image_paths, size=rows*columns, replace=False)

    for i, ax in enumerate(grid):
        image, date, user_comment = read_image_data_from_path(sample_image_paths[i])
        ax.imshow(image)
        # face_json = json.loads(user_comment)
        # face_json.pop('boundingPoly')
        # face_json.pop('fdBoundingPoly')
        # landmarks = face_json.pop('landmarks')
        left_eye = read_landmark_x_y_from_user_comment(user_comment, 'LEFT_EYE')
        right_eye = read_landmark_x_y_from_user_comment(user_comment, 'RIGHT_EYE')
        # anno_string = f"r: {face_json['rollAngle']}\np: {face_json['panAngle']}\nt: {face_json['tiltAngle']}"
        anno_string = json.dumps([left_eye, right_eye], indent=2)
        ax.text(10, 200, anno_string, color='black', fontsize=10, bbox = dict(facecolor = 'red', alpha = 0.5))
        ax.set_title(str(date)[:10])
    plt.show()


In [127]:
# _, _, user_comment = read_image_data_from_path(image_paths[0])

# print(user_comment)

# show_image_in_grids(1, 1, image_paths)



diff in day: 497, diff per image: 9.94


  return image.resize((width, height), Image.LANCZOS)
