In [None]:
#| default_exp date_compare

## date_compare
> attempt to find corresponding dates from location history and image exif data

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from regps.json_explorer import *
from regps.exif_explorer import *

Use exif explorer to build up a list of image metadata

In [None]:
import glob
path = 'sample-data/vegas/*.jpg'
images = glob.glob(path)
images

['sample-data/vegas/MVIMG_20181126_194541.jpg',
 'sample-data/vegas/IMG_20181129_162952.jpg',
 'sample-data/vegas/IMG_20181129_225115.jpg',
 'sample-data/vegas/IMG_20181129_130709.jpg',
 'sample-data/vegas/MVIMG_20181129_163316.jpg',
 'sample-data/vegas/MVIMG_20181129_162454.jpg',
 'sample-data/vegas/IMG_20181129_232355.jpg',
 'sample-data/vegas/MVIMG_20181129_130619.jpg',
 'sample-data/vegas/MVIMG_20181126_101615.jpg',
 'sample-data/vegas/IMG_20181129_231241.jpg',
 'sample-data/vegas/IMG_20181129_205739.jpg',
 'sample-data/vegas/MVIMG_20181126_123022.jpg',
 'sample-data/vegas/MVIMG_20181129_123837.jpg',
 'sample-data/vegas/00000IMG_00000_BURST20181129201757840_COVER.jpg',
 'sample-data/vegas/IMG_20181130_075721.jpg',
 'sample-data/vegas/IMG_20181129_194332.jpg',
 'sample-data/vegas/MVIMG_20181129_231250.jpg',
 'sample-data/vegas/IMG_20181129_163324.jpg',
 'sample-data/vegas/IMG_20181129_115616.jpg',
 'sample-data/vegas/MVIMG_20181129_210937.jpg',
 'sample-data/vegas/00012IMG_00012_BUR

In [None]:
image_list = extract_exif(images)

assert len(image_list) == 98

In [None]:
image_list[0].exif.get("datetime_original")
# note that some of these can be None, since some images are created by google photos

'2018:11:26 19:45:41'

Now we need to convert the to a timestamp the dates from exif

In [None]:
#| export
from datetime import datetime
def to_timestamp(date):
    date_format = "%Y:%m:%d %H:%M:%S"
    timestamp = datetime.strptime(date, date_format)
    return int(timestamp.timestamp())

In [None]:
# testing to make sure we get the right value back
timestamp = to_timestamp(image_list[0].exif.get("datetime_original"))
timestamp


1543275941

In [None]:
timestamp2 = to_timestamp(image_list[-1].exif.get("datetime_original"))
timestamp2

1543504923

In [None]:
#| export

def delta_to_minutes(delta):
    return int(delta.total_seconds() // 60)

def get_time_delta(a, b):
    a = datetime.fromtimestamp(a)
    b = datetime.fromtimestamp(b)
    if a > b:
        return delta_to_minutes(a - b)
    return delta_to_minutes(b - a)

In [None]:
results = get_time_delta(timestamp2, timestamp)
print(results)

3816


In [None]:
# quick test
assert(get_time_delta(timestamp2, timestamp)) == 3816

Now lets pull some information from location history to compare

In [None]:
file_to_open = "sample-data/Records.json"

locations = get_locations(file_to_open)
assert len(locations) == 672293

In [None]:
locations_w_gps = build_location_history(locations)
assert len(locations_w_gps) == 672291

In [None]:
locations_w_gps[0]

Location(timestamp=1467216494, latitude=446549411, longitude=-635836042, accuracy=41)

Now lets come up with a bruteforce solution to finding the aligning dates from both datasets

In [None]:
#| export
from collections import defaultdict

# bruteforce implementation, this can be optimized later since it's operating on sorted lists
def get_smallest_deltas(image_list, locations):
    d = {}
    for image_index, image in enumerate(image_list):
        current_delta = 99999999999
        for location_index, location in enumerate(locations_w_gps):
            
            # find the delta and store the smallest values index
            delta = get_time_delta(
                to_timestamp(image.exif.get("datetime_original")),
                location.timestamp)
            if delta < current_delta:
                current_delta = delta
                d[image_index] = location_index
    return d

In [None]:
d = get_smallest_deltas(image_list, locations_w_gps)  
d

{0: 434262,
 1: 434771,
 2: 434780,
 3: 434745,
 4: 434771,
 5: 434771,
 6: 434781,
 7: 434744,
 8: 434222,
 9: 434780,
 10: 434779,
 11: 434229,
 12: 434744,
 13: 434779,
 14: 434799,
 15: 434778,
 16: 434780,
 17: 434771,
 18: 434741,
 19: 434779,
 20: 434286,
 21: 434779,
 22: 434771,
 23: 434232,
 24: 434778,
 25: 434753,
 26: 434781,
 27: 434745,
 28: 434771,
 29: 434780,
 30: 434206,
 31: 434221,
 32: 434799,
 33: 434221,
 34: 434799,
 35: 434232,
 36: 434445,
 37: 434221,
 38: 434771,
 39: 434744,
 40: 434260,
 41: 434780,
 42: 434779,
 43: 434221,
 44: 434219,
 45: 434236,
 46: 434232,
 47: 434781,
 48: 434780,
 49: 434799,
 50: 434778,
 51: 434262,
 52: 434744,
 53: 434221,
 54: 434779,
 55: 434232,
 56: 434221,
 57: 434744,
 58: 434780,
 59: 434223,
 60: 434221,
 61: 434771,
 62: 434221,
 63: 434781,
 64: 434799,
 65: 434442,
 66: 434771,
 67: 434447,
 68: 434219,
 69: 434222,
 70: 434771,
 71: 434744,
 72: 434799,
 73: 434221,
 74: 434221,
 75: 434771,
 76: 434779,
 77: 4342

Now we can take a peek at how close the date ranges between the image timestamps and location history timestamps are.

In [None]:
#| de-google lat/long

def convert_to_decimal(lat, long):
    # 1e7 is the value to divide by to convert from latitudeE7/longitudeE7 fields
    return lat/1e7, long/1e7

In [None]:
for image_index, location_index in d.items():
    print(f"{image_index=}, {location_index=}")
    image_time = to_timestamp(image_list[image_index].exif.get("datetime_original"))
    location_time = locations_w_gps[location_index].timestamp
    delta = get_time_delta(image_time, location_time)
    print(delta)
    lat, long = convert_to_decimal(locations_w_gps[location_index].latitude, locations_w_gps[location_index].longitude)
    print(f"{lat=}, {long=}")

image_index=0, location_index=434262
17
lat=36.1230049, long=-115.1653861
image_index=1, location_index=434771
1
lat=36.1294693, long=-115.1646271
image_index=2, location_index=434780
59
lat=36.1242643, long=-115.1668683
image_index=3, location_index=434745
9
lat=36.1294664, long=-115.164631
image_index=4, location_index=434771
2
lat=36.1294693, long=-115.1646271
image_index=5, location_index=434771
6
lat=36.1294693, long=-115.1646271
image_index=6, location_index=434781
78
lat=36.13149, long=-115.1649832
image_index=7, location_index=434744
10
lat=36.1294662, long=-115.1646327
image_index=8, location_index=434222
9
lat=36.1294643, long=-115.1646844
image_index=9, location_index=434780
80
lat=36.1242643, long=-115.1668683
image_index=10, location_index=434779
52
lat=36.1242643, long=-115.1668683
image_index=11, location_index=434229
5
lat=36.1294594, long=-115.1646513
image_index=12, location_index=434744
17
lat=36.1294662, long=-115.1646327
image_index=13, location_index=434779
92
lat

From here we can start associating the GPS data from the location history to the images that Google Photo's has stripped.

In [None]:
from nbdev.doclinks import nbdev_export
nbdev_export()