In [1]:
from glob import glob
from os.path import join
import pandas as pd

from templating.post import Post

location_data_path = '../../data/locations/'
photo_metadata_dirpath = '../../data/photo_metadata'
photo_metadata_path = join(photo_metadata_dirpath, 'metadata.hdf')

%reload_ext autoreload
%autoreload 2

In [2]:
# load metadata
metadata = pd.read_hdf(photo_metadata_path, 'data')
metadata = metadata.sort_values('timestamp')

In [3]:
# set index
INDEX = ['album', 'filename', 'source']
metadata = metadata.set_index(INDEX)

In [4]:
# append GPS data
photo_gps_path = join(photo_metadata_dirpath, 'photo_gps.hdf')
photo_gps = pd.read_hdf(photo_gps_path, 'data')

# assign latitude/longitude
metadata = metadata.join(photo_gps, on='timestamp', lsuffix='_native', rsuffix='_ping')
metadata['latitude'] = metadata.latitude_native.fillna(metadata.latitude_ping)
metadata['longitude'] = metadata.longitude_native.fillna(metadata.longitude_ping)

# exclude unreliable pings
max_dt_ping = 3600/8 # 15 minutes
no_gps_data = (~metadata.geotagged & (metadata.dt_ping >= max_dt_ping))
metadata.loc[no_gps_data, ['latitude', 'longitude']] = float('nan')

  raw_cell, store_history, silent, shell_futures)


In [5]:
# load manually-curated captions
user_captions_path = join(photo_metadata_dirpath, 'user_captions.xlsx')
user_captions = pd.read_excel(user_captions_path)
user_captions = user_captions.set_index(INDEX)
metadata = metadata.join(user_captions)

In [6]:
# load ping-based location data
photo_locations = pd.read_hdf(join(photo_metadata_dirpath, 'photo_locations.hdf'), 'data')
photo_locations = photo_locations.set_index(INDEX)
metadata = metadata.join(photo_locations, how='left', rsuffix='_auto')
metadata['caption'] = metadata.caption.fillna(metadata.caption_auto)

In [7]:
# load manually-curated location data
user_locations_path = join(photo_metadata_dirpath, 'user_gps.xlsx')
user_locations = pd.read_excel(user_locations_path)
user_locations = user_locations.set_index(INDEX)
metadata = metadata.fillna(user_locations[['city', 'country', 'latitude', 'longitude']])

In [8]:
# load manually-excluded photos
user_exclusions_path = join(photo_metadata_dirpath, 'user_hidden_photos.xlsx')
user_exclusions = pd.read_excel(user_exclusions_path)
exclusions = user_exclusions.set_index(INDEX)
metadata = metadata.drop(index=exclusions.index)

In [19]:
class Writer:
    
    titles = {
        'Spain': 'Catalonia'
    }
    
    covers = {'Germany': 'https://i.imgur.com/rbmZHXf.jpg',
         'Italy': 'https://i.imgur.com/XZ6Lz5l.jpg',
         'Czech Republic': 'https://i.imgur.com/2DB7yPR.jpg',
         'Slovenia': 'https://i.imgur.com/G2GzEPJ.jpg',
         'Palm Springs': 'https://i.imgur.com/fdcEkox.jpg',
         'Spain': 'https://i.imgur.com/DANYgWx.jpg',
         'Morocco': 'https://i.imgur.com/YTDqFPU.jpg',
         'Croatia': 'https://i.imgur.com/xvwSpqZ.jpg',
         'Austria': 'https://i.imgur.com/ZkyFsMM.jpg',
         'Italy 2': 'https://i.imgur.com/tPViG2v.jpg',
         'France': 'https://i.imgur.com/5Usf1GT.jpg',
         'England': 'https://i.imgur.com/HfAchQr.jpg',
         'Tahoe': 'https://i.imgur.com/WDecwZB.jpg',
         'Rocky Mountains': 'https://i.imgur.com/s8pTyWx.jpg'}
    
    def __init__(self, posts):
        self.posts = posts

    @staticmethod
    def datetime_to_str(ts, fmt='%Y:%m:%d %H:%M:%S'):
        return ts.strftime(fmt)
    
    @property
    def post_order(self):
        mean_timestamps = self.posts.groupby('album')['timestamp'].aggregate(lambda x: x.mean())
        return mean_timestamps.sort_values()
        
    def write_post(self, album, date='2020-01-01', cover=None):
        name_str = '-'.join([x.lower() for x in album.split()])
        filename = '{:s}-{:s}'.format(date, name_str)
        
        if album in self.titles.keys():
            title = self.titles[album]
        else:
            title = album
        
        records = self.posts.loc[album].sort_values(by='timestamp')
        post = Post(filename, title, records, cover=cover)
        post.write()
        
    def write(self):        
        for album, timestamp in self.post_order.iteritems():
            date, time = self.datetime_to_str(timestamp).split()
            self.write_post(album, date=date.replace(':', '-'), cover=self.covers[album])
        

In [20]:
writer = Writer(metadata)
writer.write()