In [3]:
import pandas as pd
import numpy as np
import argparse
import os
import random
import shapefile
import sys
import requests
import time

random.seed('street view')

# Ler o arquivo CSV com os códigos dos países

paises = pd.read_csv("UNSD — Methodology.csv")
paises = paises[paises['Region Name'].isin(['Europe', 'Oceania']) | 
       paises['Country or Area'].isin(['South Africa','Botswana','Ghana','Senegal',
                                       'Sri Lanka','Bangladesh','Japan', 'South Korea','Mexico']) |
       paises['Intermediate Region Name'].isin(['South America','Northern America','South-eastern Asia'])]

shape_file = "TM_WORLD_BORDERS-0.3.shp"
if not os.path.exists(shape_file):
    print("Cannot find " + shape_file + ". Please download it from "
    "http://thematicmapping.org/downloads/world_borders.php "
    "and try again.")
    sys.exit()

sf = shapefile.Reader(shape_file)

records = pd.DataFrame([record for record in sf.records()])
shapes = pd.DataFrame([record for record in sf.shapes()])
shapes = shapes[records[2].isin(list(paises["ISO-alpha3 Code"]))]
records = records[records[2].isin(list(paises["ISO-alpha3 Code"]))]

records['perc_area'] = (records[5] - min(records[5]))/(sum(records[5]))
records
#borders = shapes.iloc[0][0].points
#borders

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,perc_area
3,AL,AL,ALB,8,Albania,2740,3153731,150,39,20.068,41.143,5.204397e-04
6,AQ,AS,ASM,16,American Samoa,20,64051,9,61,-170.730,-14.318,3.798830e-06
7,AR,AR,ARG,32,Argentina,273669,38747148,19,5,-65.167,-35.377,5.198110e-02
8,AS,AU,AUS,36,Australia,768230,20310208,9,53,136.189,-24.973,1.459188e-01
13,BG,BD,BGD,50,Bangladesh,13017,15328112,142,34,89.941,24.218,2.472468e-03
15,BK,BA,BIH,70,Bosnia and Herzegovina,5120,3915238,150,39,17.786,44.169,9.725005e-04
16,BL,BO,BOL,68,Bolivia,108438,9182015,19,5,-64.671,-16.715,2.059688e-02
19,BP,SB,SLB,90,Solomon Islands,2799,472419,9,54,160.109,-9.611,5.316462e-04
20,BR,BR,BRA,76,Brazil,845942,186830759,19,5,-53.089,-10.772,1.606795e-01
21,BU,BG,BGR,100,Bulgaria,11063,7744591,150,151,25.231,42.761,2.101323e-03


In [4]:
# Determine if a point is inside a given polygon or not
# Polygon is a list of (x,y) pairs.
# http://www.ariel.com.au/a/python-point-int-poly.html
def point_inside_polygon(x, y, poly):
	n = len(poly)
	inside = False
	p1x, p1y = poly[0]
	for i in range(n+1):
		p2x, p2y = poly[i % n]
		if y > min(p1y, p2y):
			if y <= max(p1y, p2y):
				if x <= max(p1x, p2x):
					if p1y != p2y:
						xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
					if p1x == p2x or x <= xinters:
						inside = not inside
		p1x, p1y = p2x, p2y
	return inside


def get_street_view_images(images_wanted, seed):

	paises = pd.read_csv("UNSD — Methodology.csv", usecols = ["ISO-alpha3 Code"])['ISO-alpha3 Code']
    
	# Google Street View Image API
	# 25,000 image requests per 24 hours
	# See https://developers.google.com/maps/documentation/streetview/
	API_KEY = "AIzaSyC4tTaiv-T9hjjyt2nQ-eb8cm0fHeDt2os"
	GOOGLE_URL = ("http://maps.googleapis.com/maps/api/streetview?sensor=false&"
			"size=640x640&key=" + API_KEY)

	IMG_PREFIX = "img_"
	IMG_SUFFIX = ".jpg"

	attempts, imagery_hits= 0, 0

	if not os.path.exists("Images"):
		os.makedirs("Images")
    
	random.seed(seed)
    
	start_time = time.time()   
	try:
		elapsed_time = 0
		while(elapsed_time < 1800):
			elapsed_time = time.time() - start_time
			i = int(np.random.choice(range(0,len(records)),1, p = records['perc_area']))
			#print(i)
			record = records.iloc[i]
			heading = str(random.random()*360)
			#print("Finding country")
			#print(record[2], record[4])
			#print(shapes.iloc[i][0].bbox)
			min_lon = shapes.iloc[i][0].bbox[0]
			min_lat = shapes.iloc[i][0].bbox[1]
			max_lon = shapes.iloc[i][0].bbox[2]
			max_lat = shapes.iloc[i][0].bbox[3]
			borders = shapes.iloc[i][0].points
			attempts += 1
			rand_lat = round(random.uniform(min_lat, max_lat),2)
			rand_lon = round(random.uniform(min_lon, max_lon),2)
			# print attempts, rand_lat, rand_lon
			# Is (lat,lon) inside borders?
			if point_inside_polygon(rand_lon, rand_lat, borders):
				#print("  In country")
				#country_hits += 1
				lat_lon = str(rand_lat) + "," + str(rand_lon)
				outfile = os.path.join("Images", IMG_PREFIX + lat_lon + IMG_SUFFIX)
				url = GOOGLE_URL + "&location=" + lat_lon
				if heading:
					url += "&heading=" + heading
				try:
					r = requests.get(url, stream=True)
					with open(outfile, 'wb') as f:
						for chunk in r:
							f.write(chunk)
				except KeyboardInterrupt:
					elapsed_time = time.time() - start_time
					#print("Keyboard interrupt")
				except:
					pass
				if os.path.isfile(outfile):
					#print(lat_lon)
					if os.path.getsize(outfile) < 9000:
						#print("    No imagery")
						#imagery_misses += 1
						os.remove(outfile)
					else:
						#print("    ========== Got one! ==========")
						imagery_hits += 1
						if imagery_hits == images_wanted:
							elapsed_time = time.time() - start_time
							break
	except KeyboardInterrupt:
		elapsed_time = time.time() - start_time
		print("Keyboard interrupt")

	print("Elapsed time:\t", elapsed_time)
	print("Number of hits:\t", imagery_hits)
	print("Imagery hit rate per second:\t", imagery_hits/elapsed_time)

	# End of file

In [None]:
get_street_view_images(250, 'street view')

In [5]:
def get_street_view_images_2(images_wanted, seed):

	paises = pd.read_csv("UNSD — Methodology.csv", usecols = ["ISO-alpha3 Code"])['ISO-alpha3 Code']
    
	# Google Street View Image API
	# 25,000 image requests per 24 hours
	# See https://developers.google.com/maps/documentation/streetview/
	API_KEY = "AIzaSyC4tTaiv-T9hjjyt2nQ-eb8cm0fHeDt2os"
	GOOGLE_URL = ("http://maps.googleapis.com/maps/api/streetview?sensor=false&"
			"size=640x640&key=" + API_KEY)

	IMG_PREFIX = "img_"
	IMG_SUFFIX = ".jpg"

	attempts, imagery_hits= 0, 0

	if not os.path.exists("Images 2"):
		os.makedirs("Images 2")
    
	random.seed(seed)
    
	local_hit = 0
	local_start_time = 0

	start_time = time.time()
	try:
		elapsed_time = 0
		while(elapsed_time < 1800):
			elapsed_time = time.time() - start_time 
			attempts += 1
			if (local_hit == 0):
				i = int(np.random.choice(range(0,len(records)),1, p = records['perc_area']))
				print(i)
				record = records.iloc[i]
				heading = str(random.random()*360)
				print("Finding country")
				print(record[2], record[4])
				print(shapes.iloc[i][0].bbox)
				min_lon = shapes.iloc[i][0].bbox[0]
				min_lat = shapes.iloc[i][0].bbox[1]
				max_lon = shapes.iloc[i][0].bbox[2]
				max_lat = shapes.iloc[i][0].bbox[3]
				borders = shapes.iloc[i][0].points
				rand_lat = round(random.uniform(min_lat, max_lat),2)
				rand_lon = round(random.uniform(min_lon, max_lon),2)
				print(attempts, rand_lat, rand_lon)
			else:
				if (time.time() - local_start_time >= 100):
					local_hit = 0
					attempts -= 1
					continue
				rand_lat = round(random.uniform(rand_lat - rand_lat/60, rand_lat + rand_lat/60),2)
				rand_lon = round(random.uniform(rand_lon - rand_lon/60, rand_lon + rand_lon/60),2)
			# Is (lat,lon) inside borders?
			if point_inside_polygon(rand_lon, rand_lat, borders):
				print("  In country")
				#country_hits += 1
				lat_lon = str(rand_lat) + "," + str(rand_lon)
				print(lat_lon)
				outfile = os.path.join("Images 2", IMG_PREFIX + lat_lon + IMG_SUFFIX)
				url = GOOGLE_URL + "&location=" + lat_lon
				if heading:
					url += "&heading=" + heading
				try:
					r = requests.get(url, stream=True)
					with open(outfile, 'wb') as f:
						for chunk in r:
							f.write(chunk)
				except KeyboardInterrupt:
					elapsed_time = time.time() - start_time
					print("Keyboard interrupt")
				except:
					pass
				if os.path.isfile(outfile):
					print(lat_lon)
					if os.path.getsize(outfile) < 9000:
						print("    No imagery")
						#imagery_misses += 1
						os.remove(outfile)
					else:
						print("    ========== Got one! ==========")
						imagery_hits += 1
						local_hit = 1
						local_start_time = time.time()
						if imagery_hits == images_wanted:
							elapsed_time = time.time() - start_time
							break
	except KeyboardInterrupt:
		elapsed_time = time.time() - start_time
		print("Keyboard interrupt")

	print("Elapsed time:\t", elapsed_time)
	print("Number of hits:\t", imagery_hits)
	print("Imagery hit rate per second:\t", imagery_hits/elapsed_time)

	# End of file

In [None]:
get_street_view_images_2(250, 'street view')

3
Finding country
AUS Australia
[112.90721100000007, -54.75389100000001, 159.101898, -10.051390000000026]
1 -34.03 130.37
  In country
Keyboard interrupt
78
Finding country
ZAF South Africa
[16.48333000000008, -46.96972699999998, 37.98166700000013, -22.136391000000003]
2 -36.81 26.76
8
Finding country
BRA Brazil
[-74.01055899999994, -33.74389600000001, -29.839999999999975, 5.273888999999997]
3 -19.49 -56.24
  In country
