# Explore Geocode Data for Toronto, Ontario, Canada

Prepare the information.
Retrieve the dataframe containing the Toronto neighborhood information from the earlier exercise using the stored CSV file.

In [7]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import geocoder # library for geocoding information

In [5]:
# Ensure the python geocoder module is available to this Jupyter notebook.
!conda install -c conda-forge geocoder -y

Collecting package metadata: done
Solving environment: \ 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/linux-64::anaconda==5.3.1=py37_0
  - defaults/linux-64::astropy==3.0.4=py37h14c3975_0
  - defaults/linux-64::bkcharts==0.2=py37_0
  - defaults/linux-64::blaze==0.11.3=py37_0
  - defaults/linux-64::bokeh==0.13.0=py37_0
  - defaults/linux-64::bottleneck==1.2.1=py37h035aef0_1
  - defaults/linux-64::dask==0.19.1=py37_0
  - defaults/linux-64::datashape==0.5.4=py37_1
  - defaults/linux-64::mkl-service==1.1.2=py37h90e4bf4_5
  - defaults/linux-64::numba==0.39.0=py37h04863e7_0
  - defaults/linux-64::numexpr==2.6.8=py37hd89afb7_0
  - defaults/linux-64::odo==0.5.1=py37_0
  - defaults/linux-64::pytables==3.4.4=py37ha205bf6_0
  - defaults/linux-64::pytest-arraydiff==0.2=py37h39e3cac_0
  - defaults/linux-64::pytest-astropy==0.4.0=py37_0
  - defaults/linux-64::pytest-doctestplus==0.1.3=py37_0
  - defaults

*Note: This notebook depends on the reuse of the postal codes dataframe from the prior exercise.*

In [4]:
# Load the data frame from the prior exercise
postalcodes_df = pd.read_csv('postalCodesToronto_dataframe.csv')
postalcodes_df.head()

# Load a second copy of the data frame from the prior exercise for using with the Google geocoder API
postalcodes_g_df = pd.read_csv('postalCodesToronto_dataframe.csv')

# Load a third copy of the data frame from the prior exercise for using with the ArcGIS geocoder API
postalcodes_arc_df = pd.read_csv('postalCodesToronto_dataframe.csv')

postalcodes_df.shape[0]

## Retrieve the geographic information using the Python library
Use the Geocoder package or the csv file to create a dataframe with the following columns:
* Postal Code
* Borough
* Neighborhood
* Latitude
* Longitude

#### This section would load the data using Google geocoder API
This section stores the information into the dataframe, postalcodes_g_df.
However, the requests to Google times out frequently.

In [8]:
Lat_g_list=[]
Lng_g_list=[]

# Use the postalcodes_g_df dataframe for the Google geocoder API
for p in range(postalcodes_g_df.shape[0]):
    # initialize the variable to None
    lat_lng_coords = None
    postal_code = postalcodes_g_df.iloc[p]['Postcode']
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

    Lat_g_list.append(g.latlng[0])
    Lng_g_list.append(g.latlng[1])
    
    postalcodes_g_df.loc[postalcodes_g_df.index[postal_code], 'Latitude'] = latitude
    postalcodes_g_df.loc[postalcodes_g_df.index[postal_code], 'Longitude'] = longitude

Status code Unknown from https://maps.googleapis.com/maps/api/geocode/json: ERROR - HTTPSConnectionPool(host='maps.googleapis.com', port=443): Read timed out. (read timeout=5.0)


KeyboardInterrupt: 

In [9]:
postalcodes_g_df.shape
postalcodes_g_df.head()
print(len(Lat_list))
print(len(Lng_list))

NameError: name 'Lat_list' is not defined

#### This section would load the data using ArcGIS geocoder API
The information would be stored in the dataframe, postalcodes_arc_df.

In [10]:
# Alternate use: using arcgis
Lat_arc_list=[]
Lng_arc_list=[]
for i in range(postalcodes_arc_df.shape[0]):
    address='{}, Toronto, Ontario'.format(postalcodes_arc_df.at[i,'Postcode'])
    g = geocoder.arcgis(address)
    Lat_arc_list.append(g.latlng[0])
    Lng_arc_list.append(g.latlng[1])
    
    # find the index of the postal code and append the two columns to the dataframe
    postalcodes_arc_df.loc[postalcodes_arc_df.index[i], 'Latitude'] = g.latlng[0]
    postalcodes_arc_df.loc[postalcodes_arc_df.index[i], 'Longitude'] = g.latlng[1]

In [11]:
postalcodes_arc_df.shape[0]
postalcodes_arc_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.78573,-79.15875
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.76569,-79.175256
3,M1G,Scarborough,Woburn,43.768359,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


#### Retrieve the information from the published data file
*For reference, the geocodes are available in file online:*
http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv

In [12]:
# Download the data set
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data/
print('Geospatial Coordinates Data downloaded!')

Geospatial Coordinates Data downloaded!


In [13]:
geospatial_df = pd.read_csv('Geospatial_Coordinates.csv')
geospatial_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Format the information
Add the latitude and longitude columns to the postal codes dataframe where the postal code is the index on both dataframes.

In [14]:
# merge the two dataframes
postalcodes_df.index
geospatial_df.index

for i in range(postalcodes_df.shape[0]):
    postalcodes_df.loc[postalcodes_df.index[i], 'Latitude'] = geospatial_df.loc[geospatial_df.index[i], 'Latitude']
    postalcodes_df.loc[postalcodes_df.index[i], 'Longitude'] = geospatial_df.loc[geospatial_df.index[i], 'Longitude']
postalcodes_df.shape[0]
postalcodes_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Display the final output

In [15]:
# Save the dataframe to a CSV file for later use.
export_csv = postalcodes_df.to_csv (r'postalCodesTorontoGeo_dataframe.csv', index = None, header=True)