In [4]:
from glob import glob
from os.path import join
import pandas as pd

from modules.imgur import Client
from templating.post import Post

%reload_ext autoreload
%autoreload 2

In [5]:
metadata = pd.read_hdf(Client.metadata_path, 'data')
metadata['album'] = metadata.album.replace('Czech (2019)', 'Czechia (2019)')
metadata['album'] = metadata.album.replace('Home (2020)', 'Tahoe (2020)')
metadata['album'] = metadata.album.replace('Desert (2020)', 'Palm Springs (2020)')
metadata['album'] = metadata.album.replace('Roadtrip (2020)', 'Rocky Mountains (2020)')
metadata['album'] = metadata.album.replace('Italy v2 (2019)', 'Italy 2 (2019)')
metadata.album = metadata.album.str.rsplit(' ', n=1).apply(lambda x: x[0])
metadata = metadata.set_index(['album', 'filename'])
metadata = metadata.sort_values('time_shot')

# GoogleMap API

In [59]:
import googlemaps
from datetime import datetime

gmaps = googlemaps.Client(key=key)

In [61]:
responses = {}
for idx, record in metadata.iterrows():
    if None in record.gps:
        continue
    
    response = gmaps.reverse_geocode(record.gps)
    responses[record.path] = response

In [63]:
import json

# with open('./location_data.json', 'w') as file:
#     json.dump(responses, file)

# Parse responses

In [1]:
import json
with open('./data/location_data.json', 'r') as file:
    responses = json.load(file)

In [6]:
import pandas as pd

def parse_response(url, response):
    
    missing_keys = [
        'country',
        'administrative_area_level_1', 
        'administrative_area_level_2', 
        'administrative_area_level_3', 
        'administrative_area_level_4',
        'locality',
        'route']

    acquired = {'url': url}
    for entry in response:
        for component in entry['address_components']:
            for i, key in enumerate(missing_keys):
                if key in component['types']:        
                    key = missing_keys.pop(i)                
                    acquired[key] = component['short_name']
                    
    return acquired

def fmt_address(record):
    
    if record.locality is None:
        caption = record.country
    
    elif record.administrative_area_level_1 is None:
        caption = '{:s}, {:s}'.format(record.locality, record.country)
        
    else:
        caption = '{:s}, {:s}'.format(record.locality, record.administrative_area_level_1)
            
    return caption

In [7]:
df = pd.DataFrame([parse_response(url, response) for url, response in responses.items()])
df[df.isna()] = None
df['address'] = df.apply(fmt_address, axis=1)
df['city'] = df.locality
df['state'] = df.administrative_area_level_1

In [52]:
metadata = pd.merge(metadata, df[['url', 'address', 'country', 'state', 'city']], how='left', left_on='path', right_on='url')

# Shift timestamps and interpolate locations

In [131]:
from datetime import datetime, timedelta

country_tshifts = {'IT': 9, 'FR': 9, 'GB': 8, 'CZ': 9, 'DE': 9, 'AT': 9, 'SI': 9, 'HR': 9, 'ES': 9, 'MA': 8}
state_tshifts = {'CA': 0, 'NV': 0, 'UT': 1, 'WY': 1, 'MT': 1, 'ID': 1}

def correct_time(record, fmt='%Y:%m:%d %H:%M:%S'):
    time_shot = datetime.strptime(record.time_shot, fmt)
    if record.country == 'US':
        dt = timedelta(hours=state_tshifts[record.state])
    else:
        
        if record.country not in country_tshifts.keys():
            print(record)
            raise ValueError
        
        dt = timedelta(hours=country_tshifts[record.country])
    us_time_shot = time_shot - dt
    return us_time_shot.strftime(fmt)
    
def get_us_time(record):
    if None in record.gps:
        return record.time_shot
    else:
        us_time_shot = correct_time(record)
        return us_time_shot

In [None]:
metadata['time_shot_pacific'] = metadata.apply(get_us_time, axis=1)
metadata = metadata.sort_values(by='time_shot_pacific')

In [171]:
import numpy as np
tagged, untagged = metadata[metadata.geotagged], metadata[~metadata.geotagged]
idxs = np.searchsorted(tagged.time_shot_pacific.values, untagged.time_shot_pacific.values)
for attr in ['city', 'state', 'country', 'address']:
    metadata.loc[untagged.index, attr] = tagged.iloc[idxs][attr].values