In [35]:
import fiona
import os,os.path
from shapely.geometry import shape,mapping
from shapely.ops import transform
from functools import partial
import pyproj
from fiona.crs import from_epsg
import osmnx as ox
import matplotlib.pyplot as plt
import geopandas as gpd
import glob
import json
import numpy as np
import requests as rq
import collections
import logging
import time
from PIL import Image
from io import BytesIO
import pandas as pd
import dateutil.parser
from tqdm import tqdm
import os
%matplotlib inline

In [36]:
# Create a custom logger
logger = logging.getLogger(__name__)

# Create handlers
f_handler = logging.FileHandler('Mapillary_download.log')
f_handler.setLevel(logging.DEBUG)

# Create formatters and add it to handlers
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(f_handler)

In [37]:
root= '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/'
imageDownloadPath = '/datasets_1/sagarj/BellLabs/CitySV_Data/CITY_MAPPILARY_IMAGES/'
cities = os.listdir(root)
mapillary_Image_url = 'https://images.mapillary.com/' #<image_key>/thumb-640.jpg

In [38]:
cities

['Helsinki, Helsinki sub-region, Uusimaa, Southern Finland, Mainland Finland, Finland',
 'Madrid, Área metropolitana de Madrid y Corredor del Henares, Community of Madrid, 28001, Spain',
 'Berlin, Germany',
 'Moscow, Central Federal District, Russia',
 'London, Greater London, England, United Kingdom',
 'Munich, Bavaria, Germany',
 'Zurich, District Zurich, Zurich, Switzerland',
 'Paris, Ile-de-France, Metropolitan France, France',
 'Vienna, Austria',
 'Stockholms kommun, Stockholm County, Sweden',
 'Manhattan, New York County, New York, United States',
 'San Francisco, California, United States',
 'Chicago, Cook County, Illinois, United States',
 'Los Angeles, Los Angeles County, California, United States',
 'Toronto, Golden Horseshoe, Ontario, Canada',
 'Hamburg, Germany',
 'Dusseldorf, North Rhine-Westphalia, Germany',
 'Tokyo, Japan',
 'Houston, Harris County, Texas, United States',
 'Frankfurt, Hesse, Germany',
 'New York, United States']

In [39]:
jsons = glob.glob(root+'*/*/*.json')

In [40]:
jsons[10000:20000]

['/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich484.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich996.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich1389.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich2005.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich638.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich725.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich336.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampling/Munich, Bavaria, Germany/pointsJson/Munich1614.json',
 '/datasets_1/sagarj/BellLabs/CitySV_Data/city_centers_sampli

In [41]:
Final_images = {} 
for f in jsons:
    ImageDict = json.load(open(f,'r'))
    for image in ImageDict:
        if image['properties']['key'] not in Final_images:
            Final_images[image['properties']['key']] = image


In [42]:
print(len(Final_images.keys()))
img_keys = list(Final_images.keys())

600197


In [43]:
def FilterByTime(imageDict , ThreshYear, ThreshMonth ):
    filtered = {}
    for k in imageDict:
        yourdate = dateutil.parser.parse(imageDict[k]['properties']['captured_at'])
        if yourdate.year > ThreshYear and yourdate.month > ThreshMonth:
            filtered[k] = imageDict[k]
    return filtered

def getYearDict(imageDict):
    yearFreq = {}
    for k in imageDict:
        yourdate = dateutil.parser.parse(imageDict[k]['properties']['captured_at'])
        if yourdate.year in yearFreq:
            yearFreq[yourdate.year]+=1
        else:
            yearFreq[yourdate.year]=1
    return yearFreq
        

In [44]:
Final_images[img_keys[10]]

{'type': 'Feature',
 'properties': {'ca': 61.705896614195865,
  'camera_make': 'samsung',
  'camera_model': 'SM-G930F',
  'captured_at': '2017-03-24T11:48:28.010Z',
  'key': 'IuWoSanUEv3R1bRw6xqwiQ',
  'pano': False,
  'sequence_key': 'ThtfC1OOOBamB6PdazArZA',
  'user_key': 'NGhN-InWNjyIjPkg3uEWNw',
  'username': 'jaakkoh',
  'quality_score': 5},
 'geometry': {'type': 'Point', 'coordinates': [24.9299466, 60.1644196]}}

In [45]:
yearFreq = getYearDict(Final_images)

In [46]:
yearFreq

{2017: 72845,
 2016: 62721,
 2019: 135751,
 2020: 168977,
 2014: 9606,
 2018: 64856,
 2015: 38278,
 2021: 45346,
 2008: 177,
 2012: 1126,
 2013: 21,
 2010: 109,
 2009: 27,
 1970: 6,
 1913: 3,
 2004: 1,
 1994: 8,
 1901: 2,
 2003: 8,
 2011: 279,
 2006: 41,
 2002: 8,
 1921: 1}

In [47]:
pandas_dict = {'Imgkey':[] , 'long': [] , 'lat': [], 'isPano':[] , 'captureTime':[], 'long_lat_string' : [] }  
for k in Final_images:
    pandas_dict['Imgkey'].append(Final_images[k]['properties']['key'])
    pandas_dict['isPano'].append(Final_images[k]['properties']['pano'])
    pandas_dict['captureTime'].append(Final_images[k]['properties']['captured_at'])
    pandas_dict['long'].append(Final_images[k]['geometry']['coordinates'][0])
    pandas_dict['lat'].append(Final_images[k]['geometry']['coordinates'][1])
    coords = Final_images[k]['geometry']['coordinates']
    pandas_dict['long_lat_string'].append(str(format(coords[0], '.4f')) + '_' + str(format(coords[1], '.4f')))
mapillary_df = pd.DataFrame.from_dict(pandas_dict)

In [49]:
def getDownloaded(path):
    filenames = glob.glob(path+'*.jpg')
    imagekeys = [f.split('/')[-1].split('.')[0] for f in filenames]
    return set(imagekeys)

In [51]:
downloaded = getDownloaded(imageDownloadPath)
print(len(downloaded))

0


In [None]:
for k in tqdm(img_keys):
    ImageKey = Final_images[k]['properties']['key']
    if ImageKey not in downloaded:
        url = mapillary_Image_url + ImageKey + "/thumb-640.jpg"
        logger.debug("Downloading: %s"%(url))
        response = rq.get(url)
        if response.status_code == 200:
            try:
                img = Image.open(BytesIO(response.content))
                filename = '%s/%s.%s' % (imageDownloadPath, ImageKey, 'jpg')
                img.save(filename)
            except:
                logger.debug("Something bad happened while downloading: %s"%(url))
        else:
            logger.debug("Download failed: %s"%(url))

  0%|          | 819/600197 [02:03<27:26:01,  6.07it/s]