In [1]:
import re
from datetime import datetime
from collections import OrderedDict
import numpy as np

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage import io, transform

from keras.applications.inception_v3 import InceptionV3, decode_predictions

import flickrapi as flickr
import folium

Using Theano backend.


In [2]:
BASE_URL = 'http://farm{farm}.static.flickr.com/{server}/{pid}_{secret}.jpg'

RECORD_KEYS = ('photo_id', 'user_id', 'title',
               'date_taken', 'camera',
               'latitude', 'longitude',
               'altitude', 'photo_url')

In [3]:
def parse_dms(dms):
    d, m, s, _ = re.split('[^\d.]+', dms)
    return float(d) + float(m)/60 + float(s)/3600


def parse_alt(alt):
    if alt is None:
        return np.nan
    else:
        try:
            return float(alt.strip('m'))
        except:
            return np.nan


def get_exif_data(f_api, photo_id):
    try:
        exif_data = f_api.photos.getExif(photo_id=photo_id)
    except flickr.FlickrError:
        return None
    exif_tags = {e['tag']: e['raw']['_content']
                 for e in exif_data['photo']['exif']}
    if 'GPSLatitude' in exif_tags and 'GPSLongitude' in exif_tags:
        lat = -parse_dms(exif_tags.get('GPSLatitude'))
        long = parse_dms(exif_tags.get('GPSLongitude'))
        date = exif_tags.get('CreateDate', 'NA')
        alt = parse_alt(exif_tags.get('GPSAltitude', None))
        camera = exif_data['photo']['camera']
        return (date, camera, lat, long, alt)


def get_geotagged_photos(f_api, max_photos=500,
                         start_page=1, per_page=250, **kwargs):
    if max_photos < per_page:
        per_page = max_photos
        n_pages = 1
    else:
        n_pages = int(np.ceil(max_photos / per_page))
    i = 0
    for page in range(start_page, start_page + n_pages):
        res = f_api.photos.search(
                accuracy=16,
                content_type=1,
                per_page=per_page,
                page=page,
                **kwargs)
        for photo in res['photos']['photo']:
            exif_data = get_exif_data(f_api, photo['id'])
            if exif_data is not None:
                url = BASE_URL.format(farm=photo['farm'],
                                      server=photo['server'],
                                      pid=photo['id'],
                                      secret=photo['secret'])

                record_vals = (photo['id'], photo['owner'], photo['title'],
                               *exif_data, url)
                record = dict(zip(RECORD_KEYS, record_vals))
                yield record

                i += 1
                if i == max_photos:
                    break

In [4]:
API_KEY = 'XXXX'
API_SECRET = 'XXXX'
BASE_URL = 'http://farm{farm}.static.flickr.com/{server}/{pid}_{secret}.jpg'

f_api = flickr.FlickrAPI(API_KEY, API_SECRET, format='parsed-json')

data = pd.DataFrame(list(get_geotagged_photos(
    f_api,
    bbox='30.92,-25.52,32.01,-22.36',
    min_taken_date=datetime.strptime('2017-10-12', "%Y-%m-%d").timestamp(),
    max_photos=500,
    per_page=500,
    start_page=1)))

In [5]:
data.to_csv('./kruger.csv')

In [6]:
data = pd.read_csv('./kruger.csv')

In [7]:
all_imgs = []
for url in data.photo_url:
    img = transform.resize(io.imread(url), (299, 299, 3))
    all_imgs.append(img)
all_imgs = np.asarray(all_imgs)
all_imgs.dump('./flickr_imgs.pk')

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [8]:
model = InceptionV3()

In [9]:
preds = decode_predictions(model.predict(all_imgs), top=1)

In [10]:
classes = []
probs = []
for record in preds:
    classes.append(record[0][1])
    probs.append(record[0][2])
data['class'] = classes
data['prob'] = probs

In [11]:
kruger_data = data[['date_taken', 'lat', 'long', 'photo_id', 'photo_url', 'title', 'user_id', 'class', 'prob']]
kruger_data.to_csv('./kruger_classified.csv')

In [12]:
html = '''
<html>
  <body>
    <h2>{photo_title}</h2>
    <img src="{photo_url}" alt="{photo_title} style="width:400px;">
  </body>
</html>
'''

icon_color = iter([
    'red', 'blue', 'green', 'purple', 'orange', 'darkred',
    'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
    'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen',
    'gray', 'black', 'lightgray'
])

m = folium.Map(location=[-25.52, 30.92],
               zoom_start=4,
               tiles='Stamen Terrain')

high_prob = kruger_data[kruger_data.prob > 0.75]

for class_name, _ in Counter(high_prob['class']).most_common()[:10]:
    class_data = high_prob[high_prob['class'] == class_name]
    group = folium.FeatureGroup(name=class_name, overlay=True)
    c = next(icon_color)
    for _, record in class_data.iterrows():
        iframe = folium.IFrame(html=html.format(photo_title=record['class'],
                                                photo_url=record['photo_url']),
                               width=500,
                               height=500)
        popup = folium.Popup(html=iframe, max_width=600)
        icon = folium.Icon(color=c)
        marker = folium.Marker(location=[record['long'],
                                         record['lat']],
                               popup=popup, icon=icon)
        group.add_child(marker)
    m.add_child(group)

lc = folium.LayerControl()
lc.add_to(m)
m