In [1]:
from glob import glob
from os.path import join
import pandas as pd

from modules.metadata import PhotoMetadata
from modules.utilities import str_to_datetime
from templating.post import Post

location_data_dirpath = '../../data/locations/'
photo_metadata_dirpath = '../../data/photo_metadata'
pings_data_path = join(location_data_dirpath, 'pings.hdf')
photo_metadata_path = join(photo_metadata_dirpath, 'photo_metadata.hdf')
photo_gps_path = join(photo_metadata_dirpath, 'photo_gps.hdf')
photo_semantic_locations_path = join(photo_metadata_dirpath, 'photo_semantic_locations.hdf')

user_captions_path = join(photo_metadata_dirpath, 'user_captions.xlsx')
user_locations_path = join(photo_metadata_dirpath, 'user_gps.xlsx')
user_exclusions_path = join(photo_metadata_dirpath, 'user_hidden_photos.xlsx')

imgur_data_path = join(photo_metadata_dirpath, 'imgur_data.hdf')

%reload_ext autoreload
%autoreload 2

# Load photo metadata

In [2]:
# load metadata
photo_metadata = PhotoMetadata(photo_metadata_path)

# load dimensions
photo_dimensions = pd.read_hdf(join(photo_metadata_dirpath, 'photo_dimensions.hdf'), 'data')

# load imgur URLs
imgur_data = pd.read_hdf(imgur_data_path, 'data')

# load photo GPS data
photo_gps = pd.read_hdf(photo_gps_path, 'data')

# load matched semantic locations
photo_semantic_locations = pd.read_hdf(photo_semantic_locations_path, 'data')

# load user-curated data
user_captions = pd.read_excel(user_captions_path).set_index(PhotoMetadata.INDEX)
user_locations = pd.read_excel(user_locations_path).set_index(PhotoMetadata.INDEX)
user_exclusions = pd.read_excel(user_exclusions_path).set_index(PhotoMetadata.INDEX)

# Generate post data

In [3]:
# append ping-based location data where GPS tag is missing
posts = photo_metadata.data.join(photo_gps, on='timestamp', lsuffix='_native', rsuffix='_ping')
posts['latitude'] = posts.latitude_native.fillna(posts.latitude_ping)
posts['longitude'] = posts.longitude_native.fillna(posts.longitude_ping)

# append dimensions
posts = posts.join(photo_dimensions)

# exclude unreliable pings
max_dt_ping = 3600/8 # 15 minutes
no_gps_data = (~posts.geotagged & (posts.dt_ping >= max_dt_ping))
posts.loc[no_gps_data, ['latitude', 'longitude']] = float('nan')

# append manually-curated captions
posts = posts.join(user_captions)

# append nearest semantic location and use it to fill missing captions
posts = posts.join(photo_semantic_locations, how='left', rsuffix='_auto')
posts['caption'] = posts.caption.fillna(posts.caption_auto)

# append manually-curated location data
posts = posts.fillna(user_locations[['city', 'country', 'latitude', 'longitude']])

# manually-excluded photos
posts = posts.drop(index=user_exclusions.index)

# append imgur URLs
posts = posts.join(imgur_data)

  raw_cell, store_history, silent, shell_futures)


In [5]:
class Writer:
    
    titles = {
        'Spain': 'Catalonia',
        'Germany': 'Bavaria',
        'CA Highway 1': 'Highway 1',
    }
    
    covers = {
        'CA Highway 1': ('CA Highway 1', 'IMG_1888.jpg', 'iPhone 7'),
        'France': ('France', 'IMG_3750.jpg', 'Canon PowerShot G9 X Mark II'),
        'Italy': ('Italy', 'IMG_3797.jpg', 'iPhone 7'),
        'England': ('England', 'IMG_3259.jpg', 'Canon PowerShot G9 X Mark II'),
        'Czech Republic': ('Czech Republic', 'IMG_1610.jpg', 'Canon PowerShot G9 X Mark II'),
        'Germany': ('Germany', 'IMG_1710.jpg', 'Canon PowerShot G9 X Mark II'),
        'Austria': ('Austria', 'IMG_1993.jpg', 'Canon PowerShot G9 X Mark II'),
        'Slovenia': ('Slovenia', 'IMG_2210.jpg', 'Canon PowerShot G9 X Mark II'),
        'Croatia': ('Croatia', 'IMG_3093.jpg', 'Canon PowerShot G9 X Mark II'),
        'Spain': ('Spain', 'IMG_3491.jpg', 'Canon PowerShot G9 X Mark II'),
        'Morocco': ('Morocco', 'IMG_7514.jpg', 'iPhone 7'),
        'Italy 2': ('Italy 2', 'IMG_3460.jpg', 'iPhone SE'),    
        'Lake Tahoe': ('Lake Tahoe', 'IMG_8674.jpg', 'iPhone 7'),
        'Palm Springs': ('Palm Springs', 'IMG_4472.jpg', 'iPhone SE'),
        'Rocky Mountains': ('Rocky Mountains', 'IMG_4760.jpg', 'Canon PowerShot G9 X Mark II')
    }
    
    def __init__(self, posts):
        self.posts = posts

    @staticmethod
    def datetime_to_str(ts, fmt='%Y:%m:%d %H:%M:%S'):
        return ts.strftime(fmt)
    
    @property
    def post_order(self):
        mean_timestamps = self.posts.groupby('album')['timestamp'].aggregate(lambda x: x.mean())
        return mean_timestamps.sort_values()
        
    def write_post(self, album, date='2020-01-01'):
        name_str = '-'.join([x.lower() for x in album.split()])
        filename = '{:s}-{:s}'.format(date, name_str)
        
        if album in self.titles.keys():
            title = self.titles[album]
        else:
            title = album
        
        cover_id = self.posts.loc[self.covers[album]].imgur_id
        
        records = self.posts.loc[album].sort_values(by='timestamp')
        
        post = Post(filename, title, records, cover_id=cover_id)
        post.write()
        
    def write(self, **kwargs):        
        for album, timestamp in self.post_order.iteritems():
            date, time = self.datetime_to_str(timestamp).split()
            self.write_post(album, date=date.replace(':', '-'), **kwargs)
        

In [6]:
writer = Writer(posts)
writer.write()

In [None]:
# INDEX = ['album', 'filename', 'source']
# records = []
# for idx, p in posts.path.iteritems():
#     record = dict(zip(INDEX, idx))
#     im = Image.open(p)
#     record['image_width'] = im.width
#     record['image_height'] = im.height
#     record['image_aspect'] = im.width/im.height
#     records.append(record)
    
# records = pd.DataFrame(records).set_index(INDEX)
# records['is_horizontal'] = records.image_aspect > 1
# records.to_hdf(join(photo_metadata_dirpath, 'photo_dimensions.hdf'), 'data')