In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from geopandas.tools import overlay

In [7]:
gis_dir = '/Volumes/Data2/RST/notebook/GIS/'

In [8]:
tts = gpd.read_file(gis_dir + 'tts2016_2006zn_shp/tts2016_2006zn_2018_region.shp')
tts = tts.to_crs(epsg='32617')
tts = tts[tts['region']==1]
tts = tts[['gta06', 'geometry']]

In [10]:
ward = gpd.read_file(gis_dir + 'wards_no-island.geojson')
ward = ward[['AREA_SHORT_CODE', 'AREA_NAME', 'geometry']]
ward = ward.to_crs(epsg = '32617')
ward['AREA_NAME'] = ward['AREA_NAME'].astype(str)

# Splitting TTS Zones by Ward Boundaries

In [11]:
tts_split = overlay(tts, ward, how="intersection")

In [12]:
tts_split['area_split'] = tts_split.area
tts['area'] = tts.area

In [13]:
tts_split = tts_split.merge(tts[['gta06', 'area']])
tts_split['prop'] = tts_split['area_split']/tts_split['area']

# Eliminating Zones With Less Than 7.5% of Original Area

By QGIS investigation, I decided that most areas less than 7.5% were not significant (ie, the reason why they straddled ward boundaries was because of geomtery mismatches or a small sliver of a park lies in another ward).

In [14]:
test = tts_split[tts_split['prop']>0.075]

In [15]:
test[test['prop'] < 0.925]

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
11,97,11,University-Rosedale,"POLYGON Z ((627191.014 4834696.751 0.000, 6271...",3.251805e+05,4.899606e+05,0.663687
12,97,9,Davenport,"POLYGON Z ((627036.932 4834143.423 0.000, 6270...",1.638711e+05,4.899606e+05,0.334458
47,26,10,Spadina-Fort York,"POLYGON Z ((632078.689 4834336.970 0.000, 6320...",1.588839e+05,2.726667e+05,0.582704
48,26,13,Toronto Centre,"POLYGON Z ((632033.514 4834476.047 0.000, 6320...",1.137828e+05,2.726667e+05,0.417296
51,15,10,Spadina-Fort York,"POLYGON Z ((632465.437 4834459.262 0.000, 6325...",7.227167e+04,1.577015e+05,0.458281
...,...,...,...,...,...,...,...
881,615,23,Scarborough North,"POLYGON Z ((640965.982 4853603.603 0.000, 6412...",1.357230e+06,1.685716e+06,0.805136
889,617,25,Scarborough-Rouge Park,MULTIPOLYGON Z (((642887.800 4854388.859 0.000...,2.457360e+05,2.074729e+06,0.118442
890,617,23,Scarborough North,"POLYGON Z ((641911.995 4855644.878 0.000, 6419...",1.828993e+06,2.074729e+06,0.881558
893,619,25,Scarborough-Rouge Park,MULTIPOLYGON Z (((642908.123 4852290.716 0.000...,2.759394e+05,1.116046e+06,0.247247


In [16]:
tts_split[tts_split['prop']<0.075].sort_values(by = ['prop'], ascending = False).head()

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
173,540,19,Beaches-East York,"POLYGON Z ((638656.922 4836787.529 0.000, 6386...",71260.925786,1015192.0,0.070195
588,336,1,Etobicoke North,"POLYGON Z ((613856.726 4836171.156 0.000, 6138...",161644.881877,2313692.0,0.069864
828,577,24,Scarborough-Guildwood,"POLYGON Z ((645013.277 4850845.706 0.000, 6450...",63007.501095,924133.6,0.06818
75,88,4,Parkdale-High Park,"POLYGON Z ((626737.291 4832360.777 0.000, 6267...",10701.102331,174906.1,0.061182
628,403,1,Etobicoke North,MULTIPOLYGON Z (((617047.348 4844127.528 0.000...,83183.431279,1551835.0,0.053603


In [17]:
tts_split_cleaned = tts_split[tts_split['prop']>0.075]
tts_split_cleaned

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
0,90,10,Spadina-Fort York,"POLYGON Z ((627728.491 4833632.008 0.000, 6278...",9.014072e+05,9.014072e+05,1.000000
1,67,10,Spadina-Fort York,"POLYGON Z ((629304.428 4834186.016 0.000, 6292...",3.904960e+05,3.904960e+05,1.000000
3,68,11,University-Rosedale,"POLYGON Z ((629149.036 4834678.525 0.000, 6291...",4.592420e+05,4.600036e+05,0.998344
5,75,11,University-Rosedale,"POLYGON Z ((628541.137 4834491.592 0.000, 6284...",3.656223e+05,3.661744e+05,0.998492
6,76,10,Spadina-Fort York,"POLYGON Z ((628699.184 4833978.518 0.000, 6286...",3.180885e+05,3.180885e+05,1.000000
...,...,...,...,...,...,...,...
907,605,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6399...",6.693021e+05,6.693021e+05,1.000000
908,611,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6398...",1.713185e+06,1.713185e+06,1.000000
909,604,23,Scarborough North,"POLYGON Z ((639249.148 4849900.597 0.000, 6392...",2.628566e+05,2.628566e+05,1.000000
910,616,23,Scarborough North,"POLYGON Z ((640595.570 4855210.096 0.000, 6408...",1.780954e+06,1.780988e+06,0.999981


In [18]:
tts_split_cleaned['prop'] = np.where(tts_split_cleaned['prop'] >= 0.925, 1, tts_split_cleaned['prop'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [21]:
tts_split_cleaned

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
0,90,10,Spadina-Fort York,"POLYGON Z ((627728.491 4833632.008 0.000, 6278...",9.014072e+05,9.014072e+05,1.0
1,67,10,Spadina-Fort York,"POLYGON Z ((629304.428 4834186.016 0.000, 6292...",3.904960e+05,3.904960e+05,1.0
3,68,11,University-Rosedale,"POLYGON Z ((629149.036 4834678.525 0.000, 6291...",4.592420e+05,4.600036e+05,1.0
5,75,11,University-Rosedale,"POLYGON Z ((628541.137 4834491.592 0.000, 6284...",3.656223e+05,3.661744e+05,1.0
6,76,10,Spadina-Fort York,"POLYGON Z ((628699.184 4833978.518 0.000, 6286...",3.180885e+05,3.180885e+05,1.0
...,...,...,...,...,...,...,...
907,605,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6399...",6.693021e+05,6.693021e+05,1.0
908,611,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6398...",1.713185e+06,1.713185e+06,1.0
909,604,23,Scarborough North,"POLYGON Z ((639249.148 4849900.597 0.000, 6392...",2.628566e+05,2.628566e+05,1.0
910,616,23,Scarborough North,"POLYGON Z ((640595.570 4855210.096 0.000, 6408...",1.780954e+06,1.780988e+06,1.0


# Splitting/Summing Ward Attributes

## C0

In [19]:
c0_df = pd.read_csv('ward_c0.csv')
c0_df

Unnamed: 0,gta06,total
0,1,103
1,14,464
2,15,656
3,16,1053
4,17,3302
...,...,...
492,618,18
493,619,203
494,620,760
495,621,224


In [23]:
c0_df = c0_df.merge(tts_split_cleaned[['gta06', 'prop', 'AREA_SHORT_CODE']])[[
    'gta06', 'AREA_SHORT_CODE','total', 'prop']]

c0_df

Unnamed: 0,gta06,AREA_SHORT_CODE,total,prop
0,1,14,103,0.225378
1,1,19,103,0.774622
2,14,10,464,0.164113
3,14,13,464,0.835508
4,15,10,656,0.458281
...,...,...,...,...
523,619,25,203,0.247247
524,619,23,203,0.752753
525,620,25,760,1.000000
526,621,25,224,1.000000


In [26]:
c0_df = c0_df.copy()
c0_df['total'] = c0_df['prop'] * c0_df['total']

c0_df = c0_df.rename(columns = {'AREA_SHORT_CODE':'ward'})

c0_df = c0_df[['ward', 'total']]
c0_df

Unnamed: 0,ward,total
0,14,5.231902
1,19,61.804071
2,10,12.496959
3,13,323.905881
4,10,137.774354
...,...,...
523,25,12.409641
524,23,115.027225
525,25,760.000000
526,25,224.000000


In [27]:
c0_df = c0_df.groupby(['ward']).sum().reset_index().sort_values(by = ['ward'])
c0_df

Unnamed: 0,ward,total
0,1,11084.470206
1,2,7223.130512
2,3,16895.732592
3,4,35954.0
4,5,27631.829282
5,6,16097.738496
6,7,16907.0
7,8,21917.657565
8,9,33801.137528
9,10,32108.246737


In [18]:
ward_trips.to_csv('ward_.csv', index = False)