In [13]:
import pandas as pd
import altair as alt
import geopandas as gpd
import json
from shapely.geometry import Point, Polygon
import shapely.wkt

In [14]:
geojson_path = '../data/vancouver.geojson'
van_json = json.loads(open(geojson_path).read())

In [15]:
geojson_neiborhoods = set()
for a in van_json['features']:
    if(a['properties']['Name'] == 'Downtown'):
        a['properties']['Name'] = 'Central Business District'
    elif(a['properties']['Name'] == 'Arbutus-Ridge'):
        a['properties']['Name'] = 'Arbutus Ridge'
    geojson_neiborhoods.add(a['properties']['Name'])

In [16]:
data_path = '../data/crimedata_csv_all_years.csv'
van_pd = pd.read_csv(data_path)

In [17]:
van_pd_less = van_pd.query('NEIGHBOURHOOD == NEIGHBOURHOOD & NEIGHBOURHOOD != "Musqueam" & NEIGHBOURHOOD != "Stanley Park"')
van_pd_less_neighborhood = set(van_pd_less['NEIGHBOURHOOD'].unique())

In [18]:
geojson_neiborhoods - van_pd_less_neighborhood

set()

In [19]:
van_pd_less_neighborhood - geojson_neiborhoods

set()

In [20]:
geojson_filepath = '../data/our_geojson.geojson'

with open(geojson_filepath, 'w') as our_geojson:
    json.dump(van_json, our_geojson)


# with open('C:/Comparison.txt', 'w') as f:
#   json.dump(full_json, f)

In [21]:
def open_geojson(path):
    """
    Opens a geojson file at "path" filepath
    """
    with open(path) as json_data:
        d = json.load(json_data)
    return d

def get_geopandas_df(path):
    """
    Creates geopandas dataframe from geeojson file 
    at "path" filepath
    """
    open_json = open_geojson(path)
    gdf = gpd.GeoDataFrame.from_features((open_json))
    return gdf

# Create geopandas dataframe from Central Park geoJson file
gdf = get_geopandas_df(geojson_filepath)
gdf = gdf.rename(columns = {'Name': 'NEIGHBOURHOOD'}).drop(columns = 'description')

In [22]:
gdf

Unnamed: 0,geometry,NEIGHBOURHOOD
0,"POLYGON ((-123.16408 49.25748, -123.16386 49.2...",Arbutus Ridge
1,"POLYGON ((-123.15528 49.23457, -123.15513 49.2...",Shaughnessy
2,"POLYGON ((-123.10003 49.27278, -123.10003 49.2...",Mount Pleasant
3,"POLYGON ((-123.12652 49.28084, -123.12600 49.2...",Central Business District
4,"POLYGON ((-123.13625 49.27584, -123.13623 49.2...",West End
5,"POLYGON ((-123.09993 49.27492, -123.09988 49.2...",Strathcona
6,"POLYGON ((-123.05649 49.29188, -123.05648 49.2...",Grandview-Woodland
7,"POLYGON ((-123.05660 49.26215, -123.05660 49.2...",Hastings-Sunrise
8,"POLYGON ((-123.10566 49.23316, -123.10628 49.2...",Riley Park
9,"POLYGON ((-123.11619 49.23354, -123.11713 49.2...",South Cambie


In [23]:
crime_cnt = (van_pd_less.groupby(['NEIGHBOURHOOD', 'TYPE'])[['MINUTE']]
             .count().rename(columns = {'MINUTE': 'COUNT'})
             .reset_index())
crime_cnt

Unnamed: 0,NEIGHBOURHOOD,TYPE,COUNT
0,Arbutus Ridge,Break and Enter Commercial,351
1,Arbutus Ridge,Break and Enter Residential/Other,1842
2,Arbutus Ridge,Mischief,1041
3,Arbutus Ridge,Other Theft,385
4,Arbutus Ridge,Theft from Vehicle,2119
...,...,...,...
193,West Point Grey,Theft from Vehicle,2349
194,West Point Grey,Theft of Bicycle,445
195,West Point Grey,Theft of Vehicle,482
196,West Point Grey,Vehicle Collision or Pedestrian Struck (with F...,6


In [24]:
# Total count for learning
crime_cnt = crime_cnt.groupby('NEIGHBOURHOOD')[['COUNT']].sum().reset_index()
crime_cnt

Unnamed: 0,NEIGHBOURHOOD,COUNT
0,Arbutus Ridge,6808
1,Central Business District,136931
2,Dunbar-Southlands,8781
3,Fairview,36629
4,Grandview-Woodland,31535
5,Hastings-Sunrise,21216
6,Kensington-Cedar Cottage,28350
7,Kerrisdale,8455
8,Killarney,11824
9,Kitsilano,30580


In [None]:
# crime_geo_cnt = gdf.merge(crime_cnt, left_on = 'Name', right_on = 'NEIGHBOURHOOD', how = 'inner')
crime_geo_cnt = gdf.merge(crime_cnt, on = 'NEIGHBOURHOOD')
crime_geo_cnt

In [None]:
alt_json = json.loads(crime_geo_cnt.to_json())
alt_base = alt.Data(values = alt_json['features'])

In [None]:
base_map = alt.Chart(alt_base, title = 'Vancouver Crime Count').mark_geoshape(
        stroke='white',
        strokeWidth=1
    ).encode(
    ).properties(
        width=1000,
        height=600
    )


choro = alt.Chart(alt_base).mark_geoshape(
    fill = 'lightgray', 
    stroke = 'white'
).encode(
    color = 'properties.COUNT:Q'
)

(choro + base_map).properties(
    width = 700, height = 400
)

In [31]:
'aLl'.lower() == 'all'

True