In [1]:
import pandas as pd
from pandas.io.json import json_normalize
from pandas import DataFrame
import os
from geopy import geocoders
from geopy.geocoders import GoogleV3
from config import gkey

import geopandas as gpd
from shapely import wkt
from shapely.wkt import loads
import re

In [2]:
# Pull in geojson address data from openaddresses
fname = "resources/us_il_city_of_evanston-addresses-city.geojson"

# read file to dataframe
gdf = gpd.read_file(fname)

simpledec = re.compile(r"\d*\.\d+")
def mround(match):
    return "{:.5f}".format(float(match.group()))

gdf.geometry = gdf.geometry.apply(lambda x: loads(re.sub(simpledec, mround, x.wkt)))
gdf

Unnamed: 0,id,unit,number,street,city,district,region,postcode,hash,geometry
0,,,811,CHICAGO AVE,EVANSTON,,IL,60202,0f1ae9d6dc3e261d,POINT (-87.67892 42.03261)
1,,,2730,CENTRAL ST,EVANSTON,,IL,60201,04d8292c6fe41f75,POINT (-87.71218 42.06420)
2,,,2726,CENTRAL ST,EVANSTON,,IL,60201,0f6aa09515f2553f,POINT (-87.71201 42.06420)
3,,,2720,CENTRAL ST,EVANSTON,,IL,60201,a9c1519f7bce1807,POINT (-87.71154 42.06419)
4,,,2722,CENTRAL ST,EVANSTON,,IL,60201,48834904e3e1b801,POINT (-87.71182 42.06420)
...,...,...,...,...,...,...,...,...,...,...
17130,,,1122,PITNER AVE,EVANSTON,,IL,60202,a06c3531387e9893,POINT (-87.70577 42.03833)
17131,,,1124,PITNER AVE,EVANSTON,,IL,60202,30eaae035c012e54,POINT (-87.70577 42.03839)
17132,,,1514,DEWEY AVE,EVANSTON,,IL,60201,c5f8058ef65e857f,POINT (-87.69672 42.04531)
17133,,,1141,DEWEY AVE,EVANSTON,,IL,60202,ba9fd44f5993932b,POINT (-87.69571 42.03897)


In [3]:
# Verify/Make pandas dataframe
addressdf = pd.DataFrame(gdf)

# Change 'geometry' column datatype from geometry to object
addressdf['geometry'] = addressdf.geometry.apply(lambda x: wkt.dumps(x, rounding_precision=5))


In [4]:
# Check dataframe
addressdf

Unnamed: 0,id,unit,number,street,city,district,region,postcode,hash,geometry
0,,,811,CHICAGO AVE,EVANSTON,,IL,60202,0f1ae9d6dc3e261d,POINT (-87.67892 42.03261)
1,,,2730,CENTRAL ST,EVANSTON,,IL,60201,04d8292c6fe41f75,POINT (-87.71218 42.06420)
2,,,2726,CENTRAL ST,EVANSTON,,IL,60201,0f6aa09515f2553f,POINT (-87.71201 42.06420)
3,,,2720,CENTRAL ST,EVANSTON,,IL,60201,a9c1519f7bce1807,POINT (-87.71154 42.06419)
4,,,2722,CENTRAL ST,EVANSTON,,IL,60201,48834904e3e1b801,POINT (-87.71182 42.06420)
...,...,...,...,...,...,...,...,...,...,...
17130,,,1122,PITNER AVE,EVANSTON,,IL,60202,a06c3531387e9893,POINT (-87.70577 42.03833)
17131,,,1124,PITNER AVE,EVANSTON,,IL,60202,30eaae035c012e54,POINT (-87.70577 42.03839)
17132,,,1514,DEWEY AVE,EVANSTON,,IL,60201,c5f8058ef65e857f,POINT (-87.69672 42.04531)
17133,,,1141,DEWEY AVE,EVANSTON,,IL,60202,ba9fd44f5993932b,POINT (-87.69571 42.03897)


In [5]:
# Combine address columns to one column
# addressdf['address'] = addressdf['street'].str.cat(addressdf['city'].str.cat(addressdf['region'],sep=" ")

# Remove extra strings from the latitude and longitude in the geometry column
addressdf['geometry'] = addressdf['geometry'].str.replace('POINT ', '').str.replace('(', '').str.replace(')', '')
addressdf

Unnamed: 0,id,unit,number,street,city,district,region,postcode,hash,geometry
0,,,811,CHICAGO AVE,EVANSTON,,IL,60202,0f1ae9d6dc3e261d,-87.67892 42.03261
1,,,2730,CENTRAL ST,EVANSTON,,IL,60201,04d8292c6fe41f75,-87.71218 42.06420
2,,,2726,CENTRAL ST,EVANSTON,,IL,60201,0f6aa09515f2553f,-87.71201 42.06420
3,,,2720,CENTRAL ST,EVANSTON,,IL,60201,a9c1519f7bce1807,-87.71154 42.06419
4,,,2722,CENTRAL ST,EVANSTON,,IL,60201,48834904e3e1b801,-87.71182 42.06420
...,...,...,...,...,...,...,...,...,...,...
17130,,,1122,PITNER AVE,EVANSTON,,IL,60202,a06c3531387e9893,-87.70577 42.03833
17131,,,1124,PITNER AVE,EVANSTON,,IL,60202,30eaae035c012e54,-87.70577 42.03839
17132,,,1514,DEWEY AVE,EVANSTON,,IL,60201,c5f8058ef65e857f,-87.69672 42.04531
17133,,,1141,DEWEY AVE,EVANSTON,,IL,60202,ba9fd44f5993932b,-87.69571 42.03897


In [6]:
# Remove un-needed columns
addressdf.drop(['unit', 'district'],axis=1,inplace=True)
addressdf

Unnamed: 0,id,number,street,city,region,postcode,hash,geometry
0,,811,CHICAGO AVE,EVANSTON,IL,60202,0f1ae9d6dc3e261d,-87.67892 42.03261
1,,2730,CENTRAL ST,EVANSTON,IL,60201,04d8292c6fe41f75,-87.71218 42.06420
2,,2726,CENTRAL ST,EVANSTON,IL,60201,0f6aa09515f2553f,-87.71201 42.06420
3,,2720,CENTRAL ST,EVANSTON,IL,60201,a9c1519f7bce1807,-87.71154 42.06419
4,,2722,CENTRAL ST,EVANSTON,IL,60201,48834904e3e1b801,-87.71182 42.06420
...,...,...,...,...,...,...,...,...
17130,,1122,PITNER AVE,EVANSTON,IL,60202,a06c3531387e9893,-87.70577 42.03833
17131,,1124,PITNER AVE,EVANSTON,IL,60202,30eaae035c012e54,-87.70577 42.03839
17132,,1514,DEWEY AVE,EVANSTON,IL,60201,c5f8058ef65e857f,-87.69672 42.04531
17133,,1141,DEWEY AVE,EVANSTON,IL,60202,ba9fd44f5993932b,-87.69571 42.03897


In [7]:

# fltraddress = addressdf[addressdf['addresses'].str.lower().str.contains('^6')]
# fltraddress

In [8]:
# adds_sml = pd.read_csv("static/data/sample/adds_sml.csv")
def format_coordinates(x):
    coordinates = x.split(' ')
    lat = str(coordinates[1])
    long = str(coordinates[0])
    formatted = lat + ', ' + long
    return formatted
addressdf['geometry'] = addressdf['geometry'].apply(format_coordinates)
addressdf


Unnamed: 0,id,number,street,city,region,postcode,hash,geometry
0,,811,CHICAGO AVE,EVANSTON,IL,60202,0f1ae9d6dc3e261d,"42.03261, -87.67892"
1,,2730,CENTRAL ST,EVANSTON,IL,60201,04d8292c6fe41f75,"42.06420, -87.71218"
2,,2726,CENTRAL ST,EVANSTON,IL,60201,0f6aa09515f2553f,"42.06420, -87.71201"
3,,2720,CENTRAL ST,EVANSTON,IL,60201,a9c1519f7bce1807,"42.06419, -87.71154"
4,,2722,CENTRAL ST,EVANSTON,IL,60201,48834904e3e1b801,"42.06420, -87.71182"
...,...,...,...,...,...,...,...,...
17130,,1122,PITNER AVE,EVANSTON,IL,60202,a06c3531387e9893,"42.03833, -87.70577"
17131,,1124,PITNER AVE,EVANSTON,IL,60202,30eaae035c012e54,"42.03839, -87.70577"
17132,,1514,DEWEY AVE,EVANSTON,IL,60201,c5f8058ef65e857f,"42.04531, -87.69672"
17133,,1141,DEWEY AVE,EVANSTON,IL,60202,ba9fd44f5993932b,"42.03897, -87.69571"


In [9]:
addressdf['city'] = addressdf['city'].apply(lambda x: f"{x},")
addressdf

Unnamed: 0,id,number,street,city,region,postcode,hash,geometry
0,,811,CHICAGO AVE,"EVANSTON,",IL,60202,0f1ae9d6dc3e261d,"42.03261, -87.67892"
1,,2730,CENTRAL ST,"EVANSTON,",IL,60201,04d8292c6fe41f75,"42.06420, -87.71218"
2,,2726,CENTRAL ST,"EVANSTON,",IL,60201,0f6aa09515f2553f,"42.06420, -87.71201"
3,,2720,CENTRAL ST,"EVANSTON,",IL,60201,a9c1519f7bce1807,"42.06419, -87.71154"
4,,2722,CENTRAL ST,"EVANSTON,",IL,60201,48834904e3e1b801,"42.06420, -87.71182"
...,...,...,...,...,...,...,...,...
17130,,1122,PITNER AVE,"EVANSTON,",IL,60202,a06c3531387e9893,"42.03833, -87.70577"
17131,,1124,PITNER AVE,"EVANSTON,",IL,60202,30eaae035c012e54,"42.03839, -87.70577"
17132,,1514,DEWEY AVE,"EVANSTON,",IL,60201,c5f8058ef65e857f,"42.04531, -87.69672"
17133,,1141,DEWEY AVE,"EVANSTON,",IL,60202,ba9fd44f5993932b,"42.03897, -87.69571"


In [10]:
addressdf = addressdf.assign(addresses = addressdf.number.astype(str) + " " +\
  addressdf.street.astype(str) + " " + addressdf.city.astype(str) + " " + \
  addressdf.region.astype(str) + " " +addressdf.postcode.astype(str))
addressdf

Unnamed: 0,id,number,street,city,region,postcode,hash,geometry,addresses
0,,811,CHICAGO AVE,"EVANSTON,",IL,60202,0f1ae9d6dc3e261d,"42.03261, -87.67892","811 CHICAGO AVE EVANSTON, IL 60202"
1,,2730,CENTRAL ST,"EVANSTON,",IL,60201,04d8292c6fe41f75,"42.06420, -87.71218","2730 CENTRAL ST EVANSTON, IL 60201"
2,,2726,CENTRAL ST,"EVANSTON,",IL,60201,0f6aa09515f2553f,"42.06420, -87.71201","2726 CENTRAL ST EVANSTON, IL 60201"
3,,2720,CENTRAL ST,"EVANSTON,",IL,60201,a9c1519f7bce1807,"42.06419, -87.71154","2720 CENTRAL ST EVANSTON, IL 60201"
4,,2722,CENTRAL ST,"EVANSTON,",IL,60201,48834904e3e1b801,"42.06420, -87.71182","2722 CENTRAL ST EVANSTON, IL 60201"
...,...,...,...,...,...,...,...,...,...
17130,,1122,PITNER AVE,"EVANSTON,",IL,60202,a06c3531387e9893,"42.03833, -87.70577","1122 PITNER AVE EVANSTON, IL 60202"
17131,,1124,PITNER AVE,"EVANSTON,",IL,60202,30eaae035c012e54,"42.03839, -87.70577","1124 PITNER AVE EVANSTON, IL 60202"
17132,,1514,DEWEY AVE,"EVANSTON,",IL,60201,c5f8058ef65e857f,"42.04531, -87.69672","1514 DEWEY AVE EVANSTON, IL 60201"
17133,,1141,DEWEY AVE,"EVANSTON,",IL,60202,ba9fd44f5993932b,"42.03897, -87.69571","1141 DEWEY AVE EVANSTON, IL 60202"


In [11]:
adds_sml = addressdf.sample(n = 100) 
adds_sml

Unnamed: 0,id,number,street,city,region,postcode,hash,geometry,addresses
4111,,1324,HARTREY AVE,"EVANSTON,",IL,60201,bd3bade5b025800e,"42.04201, -87.70408","1324 HARTREY AVE EVANSTON, IL 60201"
543,,1025,EMERSON ST,"EVANSTON,",IL,60201,009f0b324606ab42,"42.05232, -87.68607","1025 EMERSON ST EVANSTON, IL 60201"
6844,,2526,RIDGEWAY AVE,"EVANSTON,",IL,60201,4e6dc09d18d252fb,"42.06345, -87.72247","2526 RIDGEWAY AVE EVANSTON, IL 60201"
6594,,731,REBA PL,"EVANSTON,",IL,60202,8712f138d9a4ae06,"42.03001, -87.68184","731 REBA PL EVANSTON, IL 60202"
6210,,2402,PIONEER RD,"EVANSTON,",IL,60201,9c8a9fee47b62881,"42.06103, -87.70614","2402 PIONEER RD EVANSTON, IL 60201"
...,...,...,...,...,...,...,...,...,...
2504,,829,DODGE AVE,"EVANSTON,",IL,60202,f32d6402dc58292c,"42.03306, -87.69901","829 DODGE AVE EVANSTON, IL 60202"
7288,,816,SIMPSON ST,"EVANSTON,",IL,60201,e26437cf4c8e1d3a,"42.05558, -87.68265","816 SIMPSON ST EVANSTON, IL 60201"
14586,,2813,THAYER ST,"EVANSTON,",IL,60201,4013d30f75d7567b,"42.06811, -87.71334","2813 THAYER ST EVANSTON, IL 60201"
9796,,1926,DEMPSTER ST,"EVANSTON,",IL,60202,71a3403a3a75cb6c,"42.03929, -87.70162","1926 DEMPSTER ST EVANSTON, IL 60202"


In [12]:
import numpy as np
address_list = adds_sml['addresses'].to_numpy()
np.savetxt("static/data/raw/sample_address_list.txt", address_list, fmt='%5s')

In [13]:
# GRAB A SERIES OF STREET VIEW IMAGES FROM latlong PROVIDED
import json
# Import google_streetview for the api module
import google_streetview.api
import time
import glob
import streetview
import itertools 


In [15]:
#import os and urllib modules
#os for file path creation
#urllib for accessing web content
import urllib.request

 
#this is the first part of the streetview, url up to the address, this url will return a 600x600px image
#pre="https://maps.googleapis.com/maps/api/streetview?size=600x600&amp;location="
pre="https://maps.googleapis.com/maps/api/streetview?size=600x600&location="
 
#this is the second part of the streetview url, the text variable below, includes the path to a text file containing one address per line
#the addresses in this text file will complete the URL needed to return a streetview image and provide the filename of each streetview image
text="static/data/raw/sample_address_list.txt"
 
#this is the third part of the url, needed after the address
#this is my API key, please replace the one below with your own (google 'google streetview api key'), thanks!
suf=f"&key={gkey}&fov=60"
 
#this is the directory that will store the streetview images
#this directory will be created if not present
dir=r"static/data/raw/locs/"
 
#checks if the dir variable (output path) above exists and creates it if it does not
if not os.path.exists(dir):
    os.makedirs(dir)
 
#opens the address list text file (from the 'text' variable defined above) in read mode ("r")
with open(text,"r") as text_file:
  #the variable 'lines' below creates a list of each address line in the source 'text' file
  lines = [line.rstrip('\n') for line in open(text)]
#   print("THE CONTENTS OF THE TEXT FILE:\n"+str(lines))
  #start a loop through the 'lines' list
  for line in lines:
    #string clean-up to get rid of commas in the url and filename
    ln = line.replace(" " , "+")
#     print("CLEANED UP ADDRESS LINE:\n"+ln)
    # creates the url that will be passed to the url reader, this creates the full, valid, url that will return a google streetview image for each address in the address text file
    URL = pre+ln+suf
#     print("URL FOR STREETVIEW IMAGE:\n"+URL)
    #creates the filename needed to save each address's streetview image locally
    filename = os.path.join(dir,ln+".jpg")
#     print("OUTPUT FILENAME:\n"+filename)
    #you can run this up to this line in the python command line to see what each step does
    #final step, fetches and saves the streetview image for each address using the url created in the previous steps
    urllib.request.urlretrieve(URL, filename)

CLEANED UP ADDRESS LINE:
1324+HARTREY+AVE+EVANSTON,+IL+60201
URL FOR STREETVIEW IMAGE:
https://maps.googleapis.com/maps/api/streetview?size=600x600&location=1324+HARTREY+AVE+EVANSTON,+IL+60201&key=AIzaSyD9P1GXPv8rQG-xOI_8GfWaU5DZSnwP7Zg&fov=60
OUTPUT FILENAME:
static/data/raw/locs/1324+HARTREY+AVE+EVANSTON,+IL+60201.jpg
CLEANED UP ADDRESS LINE:
1025+EMERSON+ST+EVANSTON,+IL+60201
URL FOR STREETVIEW IMAGE:
https://maps.googleapis.com/maps/api/streetview?size=600x600&location=1025+EMERSON+ST+EVANSTON,+IL+60201&key=AIzaSyD9P1GXPv8rQG-xOI_8GfWaU5DZSnwP7Zg&fov=60
OUTPUT FILENAME:
static/data/raw/locs/1025+EMERSON+ST+EVANSTON,+IL+60201.jpg
CLEANED UP ADDRESS LINE:
2526+RIDGEWAY+AVE+EVANSTON,+IL+60201
URL FOR STREETVIEW IMAGE:
https://maps.googleapis.com/maps/api/streetview?size=600x600&location=2526+RIDGEWAY+AVE+EVANSTON,+IL+60201&key=AIzaSyD9P1GXPv8rQG-xOI_8GfWaU5DZSnwP7Zg&fov=60
OUTPUT FILENAME:
static/data/raw/locs/2526+RIDGEWAY+AVE+EVANSTON,+IL+60201.jpg
CLEANED UP ADDRESS LINE:
731+REBA+

In [None]:
# adds_sml.to_csv (r'static/data/sample/adds_sml.csv', index = True, header=True)

In [None]:
# # create a params dict that will be updated with new city each iteration
# params = {
#     'size': '640x640', # max 640x640 pixels
#     'heading': '90',
#     'location': '',
#     'pitch': '-0.76',
#     'fov': 120,
#     'key': gkey
# }

# # Loop through the cities_pd and run a lat/long search for each city
# geometry_list = adds_sml['geometry'].to_list()
# i = 0
# for latlong in geometry_list[50:60]:
    
#     # update address key value
#     params['location'] = latlong
    
#     # Create a results object
# #     results = google_streetview.api.results(params)
#     results = google_streetview.api.results([params])
    
    
     

#     # Download images to directory 'downloads'
#     results.download_links(f"static/data/raw/geo{i}",)
#     old_file_name = f"static/data/raw/geo{i}/gsv_0.jpg"
#     new_file_name = f"static/data/raw/geo{i}/gsv_{i}.jpg"

#     os.rename(old_file_name, new_file_name)
    
#     #pull .jpeg into main folder rename .jpeg{i}
    
#     i += 1
    
#     # Wait for 1 second
#     #time.sleep(1)

In [None]:
# # *** ORIGINAL PULL THAT WORKS ***
# # Define parameters for street view api
# params = [{
#     'size': '600x300', # max 640x640 pixels
#     'location': '41.9295582,-87.7159139',
#     'heading': '90',
#     'pitch': '-0.76',
#     'key': gkey
# }]

# # Create a results object
# results = google_streetview.api.results(params)

# # Download images to directory 'downloads'
# results.download_links('static/data/raw')

In [None]:
## PRACTICING IMAGE PRE-PROCESSING

# # load and show an image with Pillow

# # load the image
# image = Image.open('static/data/test/brickornot.jpg')
# # summarize some details about the image
# print(image.format)
# print(image.mode)
# print(image.size)
# # show the image
# image.show()

In [None]:
'''# load all images in a directory
loaded_images = list()
for filename in listdir('images'):
	# load image
	img_data = image.imread('images/' + filename)
	# store loaded image
	loaded_images.append(img_data)
	print('> loaded %s %s' % (filename, img_data.shape))'''


In [None]:
%matplotlib inline
import PIL
from PIL import Image
from os import listdir
from matplotlib import image
from matplotlib import pyplot

# example of saving a grayscale version of a loaded image
from PIL import Image
# load the image
image = Image.open('static/data/raw/gsv_0.jpg')
# convert the image to grayscale
gs_image = image.convert(mode='L')
# report the size of the image
print(image.size)
# create a thumbnail and preserve aspect ratio
image.thumbnail((100,100))
# report the size of the thumbnail
print(image.size)
# save in jpeg format
gs_image.save('static/data/test_convert/gsv_0.jpg')

In [None]:
# load the image again and show it
image2 = Image.open('static/data/test_convert/gsv_0.jpg')
# show the image
image2.show()

In [None]:
'''# save as PNG format
image.save('opera_house.png', format='PNG')
# load the image again and inspect the format
image2 = Image.open('opera_house.png')
print(image2.format)'''