In [None]:
# https://medium.com/dataexplorations/focus-generating-an-interactive-legend-in-altair-9a92b5714c55
# https://medium.com/@dougm_9851/the-battle-of-neighborhoods-coursera-ibm-capstone-project-52b4292ef410

# Cluster the neighborhoods of Toronto

Read previously saved the neiborhood data

In [1]:
#imports
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import folium
import altair as alt
import geopandas as gpd
from fiona.crs import from_epsg
import time

In [None]:
from pyproj import Proj, transform

inProj = Proj(init='epsg:3857')
outProj = Proj(init='epsg:4326')
x1,y1 = -11705274.6374,4826473.6922
x2,y2 = transform(inProj,outProj,x1,y1)
print(x2,y2)

In [None]:
import time
start=time.time()
x,y = -11705274.6374,4826473.6922
p = pyproj.Proj("+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +a=6378137 +b=6378137 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
lon, lat = p(x, y, inverse=True)
end=time.time()
print(lat, lon)
print('%.7f' % (end-start))

#### Initial analysis 
How does the data look and how large is it?

In [2]:
df = pd.read_csv('./data/toronto_neighborhoods.csv', sep=';')
print('Total rows: {}'.format(df.shape[0]))
df.head(2)

Total rows: 103


Unnamed: 0,Postcode,Borough,Neighbourhood,latlng,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.653963;-79.387207,43.653963,-79.387207
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.653963;-79.387207,43.653963,-79.387207


#### We need neighborhood granularity, so split the 'Neighborhood' column into separate values

Select only the most important columns for this (Postcode, Neighborhood) and create an intermediate dataframe with unique Postcode-Neighborhood entries on each row. 

Then join this row with the inital dataframe to also have the coordinates info

In [None]:
df_1 = df[['Postcode', 'Neighbourhood']]
df_1 = pd.DataFrame(df_1.Neighbourhood.str.split(',').tolist(), index=df_1.Postcode).stack()
df_1 = df_1.reset_index([0, 'Postcode']).rename(columns={0:'Neighbourhood'})
cols = ['Postcode', 'Borough', 'Latitude', 'Longitude']
df = pd.merge(df[cols], df_1, on='Postcode', how='inner')
print('Total rows: {}'.format(df.shape[0]))
df.head()

In [None]:
shape_df = pd.read_csv('./data/toronto_shapefile.csv')
shape_df.head(1)

In [None]:
cols = ['AREA_NAME', 'geometry']
shape_json_df = shape_df[cols]

shape_json_df['geometry1'] = shape_json_df['geometry'].apply(lambda x: x.replace("\'", "\""))

shape_json_df['geometry2'] = shape_json_df['geometry1'].apply(lambda x: x.replace("u\"", "\""))
shape_json_df['geometry2'] = shape_json_df['geometry2'].apply(lambda x: x.replace("(", "["))
shape_json_df['geometry2'] = shape_json_df['geometry2'].apply(lambda x: x.replace(")", "]"))

shape_json_df1 = shape_json_df[['AREA_NAME', 'geometry2']]
shape_json_df1.rename(columns={'geometry2':'geometry'}, inplace=True)

shape_json_df2 = shape_json_df1.set_index('AREA_NAME')
shape_json_df3 = shape_json_df2.to_json()

shape_json_df2.head(2)

In [3]:
shape_df = gpd.read_file('./data/toronto_neighbourhoods_shape/NEIGHBOURHOODS_UTM6.shp')
shape_df.head(2)

Unnamed: 0,AREA_CODE,AREA_NAME,geometry
0,97,Yonge-St.Clair (97),"POLYGON ((629662.0703489644 4837487.488926678,..."
1,27,York University Heights (27),"POLYGON ((620308.0167449361 4846066.457439966,..."


In [None]:
import pyproj    
import shapely
import shapely.ops as ops
from shapely.geometry.polygon import Polygon
from functools import partial


geom = shape_df.iloc[0].geometry
geom_area = ops.transform(
    partial(
        pyproj.transform,
        pyproj.Proj(init='EPSG:4326'),
        pyproj.Proj(
            proj='aea',
            lat1=geom.bounds[1],
            lat2=geom.bounds[3])),
    geom)

# Print the area in m^2
print(geom_area.area)


In [None]:
test = test.to_crs({'init': 'epsg:4326'})
test.plot()

In [None]:
test = shape_df.head()
test['geometry'] = test['geometry'].to_crs({'init': 'epsg:4326'})

In [None]:
#shape_df['Neighburhood'] = shape_df['AREA_NAME'].apply(lambda x: x.split(' (')[0])
import ast
shape_json = ast.literal_eval(shape_df.to_json())

shape_json

In [None]:
m = folium.Map([43.651070, -79.347015], zoom_start=12, control_scale=True)
folium.GeoJson(shape_json_df3).add_to(m)
m

In [None]:
shape_json_df2

In [None]:
import json
import requests


url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
us_states = f'{url}/us-states.json'

geo_json_data = json.loads(requests.get(us_states).text)
m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(geo_json_data).add_to(m)

m

In [None]:
geo_json_data

In [None]:
import proj4
proj4.__version__