In [None]:
#| default_exp date_compare

# date_compare
> attempt to find corresponding dates from location history and image exif data

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from regps.json_explorer import *
from regps.exif_explorer import *

Use exif explorer to build up a list of image metadata

In [None]:
import glob
path = 'sample-data/vegas/*.jpg'
images = glob.glob(path)
len(images)

101

In [None]:
image_list = extract_exif(images)

assert len(image_list) == 98

In [None]:
image_list[0].exif.get("datetime_original")
# note that some of these can be None, since some images are created by google photos

'2018:11:26 19:45:41'

Now we need to convert the to a timestamp the dates from exif

In [None]:
#| export
from datetime import datetime
def to_timestamp(date):
    date_format = "%Y:%m:%d %H:%M:%S"
    timestamp = datetime.strptime(date, date_format)
    return int(timestamp.timestamp())

In [None]:
# testing to make sure we get the right value back
timestamp = to_timestamp(image_list[0].exif.get("datetime_original"))
timestamp

1543275941

In [None]:
timestamp2 = to_timestamp(image_list[-1].exif.get("datetime_original"))
timestamp2

1543504923

In [None]:
#| export

def delta_to_minutes(delta):
    return int(delta.total_seconds() // 60)

def get_time_delta(a, b):
    a = datetime.fromtimestamp(a)
    b = datetime.fromtimestamp(b)
    if a > b:
        return delta_to_minutes(a - b)
    return delta_to_minutes(b - a)

In [None]:
results = get_time_delta(timestamp2, timestamp)
print(results)

3816


In [None]:
# quick test
assert(get_time_delta(timestamp2, timestamp)) == 3816

Now lets pull some information from location history to compare

In [None]:
file_to_open = "sample-data/Records.json"

locations = get_locations(file_to_open)
assert len(locations) == 672293

In [None]:
locations_w_gps = build_location_history(locations)
assert len(locations_w_gps) == 672291

In [None]:
locations_w_gps[0]

Location(timestamp=1467216494, latitude=446549411, longitude=-635836042, accuracy=41)

Now lets come up with a bruteforce solution to finding the aligning dates from both datasets

In [None]:
#| export
from collections import defaultdict

# This implementation takes 25mins~ to process 100 images X 300mbs of json
# bruteforce implementation, this can be optimized later since it's operating on sorted lists
def get_smallest_deltas2(image_list, locations):
    d = {}
    for image_index, image in enumerate(image_list):
        current_delta = 99999999999
        for location_index, location in enumerate(locations_w_gps):
            # find the delta and store the smallest values index
            delta = get_time_delta(
                to_timestamp(image.exif.get("datetime_original")),
                location.timestamp
            )
            
            if delta < current_delta:
                current_delta = delta
                d[image_index] = location_index
    return d

# this implementation takes under a second
from bisect import bisect
def get_smallest_deltas(image_list, locations):
    d = {}
    location_timestamps = [location.timestamp for location in locations]
    for image_index, image in enumerate(image_list):
        image_timestamp = to_timestamp(image.exif.get("datetime_original"))
        index = bisect(location_timestamps, image_timestamp)
        delta1 = get_time_delta(image_timestamp, locations[index].timestamp)
        delta2 = get_time_delta(image_timestamp, locations[index+1].timestamp)
        if delta1 > delta2:
            index = index + 1
        d[image_index] = index
    return d
    

In [None]:
d = get_smallest_deltas(image_list, locations_w_gps)  
d

{0: 434262,
 1: 434771,
 2: 434781,
 3: 434745,
 4: 434772,
 5: 434771,
 6: 434781,
 7: 434745,
 8: 434222,
 9: 434781,
 10: 434779,
 11: 434229,
 12: 434744,
 13: 434779,
 14: 434799,
 15: 434779,
 16: 434781,
 17: 434772,
 18: 434742,
 19: 434779,
 20: 434286,
 21: 434779,
 22: 434772,
 23: 434232,
 24: 434778,
 25: 434753,
 26: 434781,
 27: 434745,
 28: 434771,
 29: 434781,
 30: 434206,
 31: 434221,
 32: 434799,
 33: 434222,
 34: 434799,
 35: 434232,
 36: 434446,
 37: 434222,
 38: 434771,
 39: 434745,
 40: 434261,
 41: 434781,
 42: 434779,
 43: 434222,
 44: 434219,
 45: 434237,
 46: 434232,
 47: 434781,
 48: 434781,
 49: 434799,
 50: 434778,
 51: 434262,
 52: 434745,
 53: 434222,
 54: 434779,
 55: 434232,
 56: 434222,
 57: 434745,
 58: 434781,
 59: 434224,
 60: 434222,
 61: 434771,
 62: 434222,
 63: 434781,
 64: 434799,
 65: 434442,
 66: 434771,
 67: 434447,
 68: 434219,
 69: 434222,
 70: 434772,
 71: 434745,
 72: 434799,
 73: 434222,
 74: 434222,
 75: 434771,
 76: 434779,
 77: 4342

Now we can take a peek at how close the date ranges between the image timestamps and location history timestamps are.

In [None]:
#| export
# de-google lat/long

def convert_to_decimal(lat, long):
    # 1e7 is the value to divide by to convert from latitudeE7/longitudeE7 fields
    return lat/1e7, long/1e7

# new data structure to hold images w gps metadata
from collections import namedtuple
ImageGPS = namedtuple("ImageGPS", ["image_path", "gps"])

def de_google_gps_info(d):
    imgs_w_data = []
    for image_index, location_index in d.items():
        image_time = to_timestamp(image_list[image_index].exif.get("datetime_original"))
        location_time = locations_w_gps[location_index].timestamp
        delta = get_time_delta(image_time, location_time)
        lat, long = convert_to_decimal(locations_w_gps[location_index].latitude, locations_w_gps[location_index].longitude)
        imgs_w_data.append(ImageGPS(image_list[image_index].image_path, (lat, long)))
    return imgs_w_data

In [None]:
imgs_w_data = de_google_gps_info(d)
imgs_w_data[0]

ImageGPS(image_path='sample-data/vegas/MVIMG_20181126_194541.jpg', gps=(36.1230049, -115.1653861))

From here we can start associating the GPS data from the location history to the images that Google Photo's has stripped.

In [None]:
#| export

from GPSPhoto import gpsphoto

def write_gps_info_to_images():
    info = gpsphoto.GPSInfo((36.1230049, -115.1653861))
    photo = gpsphoto.GPSPhoto(images_and_data[0].image_path)
    photo.modGPSData(info, 'output/newFile.jpg')


In [None]:
# no longer needed

def decdeg_to_dms(coord):
    negative = coord < 0
    coord = abs(coord)
    minutes,seconds = divmod(coord * 3600, 60)
    degrees,minutes = divmod(minutes, 60)
    if negative:
        if degrees > 0:
            degrees = -degrees
        elif minutes > 0:
            minutes = -minutes
        else:
            seconds = -seconds
    return (degrees, minutes, seconds)

In [None]:
value = decdeg_to_dms(imgs_w_data[0].gps[0])
value

(36.0, 7.0, 22.817639999993844)

In [None]:
value = decdeg_to_dms(imgs_w_data[0].gps[1])
value

(-115.0, 9.0, 55.38996000000043)

# apple exif fields

```
exif:GPSAltitude=94940/11161
exif:GPSAltitudeRef=.
exif:GPSDestBearing=227653/2182
exif:GPSDestBearingRef=T
exif:GPSHPositioningError=33479/4096
exif:GPSImgDirection=227653/2182
exif:GPSImgDirectionRef=T
exif:GPSInfo=2272
exif:GPSLatitude=45/1, 30/1, 5110/100
exif:GPSLatitudeRef=N
exif:GPSLongitude=73/1, 31/1, 3981/100
exif:GPSLongitudeRef=W
exif:GPSSpeed=4744/18627
exif:GPSSpeedRef=K
```

In [None]:
from nbdev.doclinks import nbdev_export
nbdev_export()