In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from geopandas.tools import overlay

In [2]:
tts = gpd.read_file('tts2016_2006zn_shp/tts2016_2006zn_2018_region.shp')
tts = tts.to_crs(epsg='32617')
tts = tts[tts['region']==1]
tts = tts[['gta06', 'geometry']]

In [3]:
ward = gpd.read_file('wards_no-island.geojson')
ward = ward[['AREA_SHORT_CODE', 'AREA_NAME', 'geometry']]
ward = ward.to_crs(epsg = '32617')
ward['AREA_NAME'] = ward['AREA_NAME'].astype(str)

# Splitting TTS Zones by Ward Boundaries

In [4]:
tts_split = overlay(tts, ward, how="intersection")

In [5]:
tts_split['area_split'] = tts_split.area
tts['area'] = tts.area

In [6]:
tts_split = tts_split.merge(tts[['gta06', 'area']])
tts_split['prop'] = tts_split['area_split']/tts_split['area']

# Eliminating Zones With Less Than 7.5% of Original Area

By QGIS investigation, I decided that most areas less than 7.5% were not significant (ie, the reason why they straddled ward boundaries was because of geomtery mismatches or a small sliver of a park lies in another ward).

In [7]:
test = tts_split[tts_split['prop']>0.075]

In [8]:
test[test['prop'] < 0.925]

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
11,97,11,University-Rosedale,"POLYGON Z ((627191.014 4834696.751 0.000, 6271...",3.251805e+05,4.899606e+05,0.663687
12,97,9,Davenport,"POLYGON Z ((627036.932 4834143.423 0.000, 6270...",1.638711e+05,4.899606e+05,0.334458
47,26,10,Spadina-Fort York,"POLYGON Z ((632078.689 4834336.970 0.000, 6320...",1.588839e+05,2.726667e+05,0.582704
48,26,13,Toronto Centre,"POLYGON Z ((632033.514 4834476.047 0.000, 6320...",1.137828e+05,2.726667e+05,0.417296
51,15,10,Spadina-Fort York,"POLYGON Z ((632465.437 4834459.262 0.000, 6325...",7.227167e+04,1.577015e+05,0.458281
...,...,...,...,...,...,...,...
881,615,23,Scarborough North,"POLYGON Z ((640965.982 4853603.603 0.000, 6412...",1.357230e+06,1.685716e+06,0.805136
889,617,25,Scarborough-Rouge Park,MULTIPOLYGON Z (((642887.800 4854388.859 0.000...,2.457360e+05,2.074729e+06,0.118442
890,617,23,Scarborough North,"POLYGON Z ((641911.995 4855644.878 0.000, 6419...",1.828993e+06,2.074729e+06,0.881558
893,619,25,Scarborough-Rouge Park,MULTIPOLYGON Z (((642908.123 4852290.716 0.000...,2.759394e+05,1.116046e+06,0.247247


In [9]:
tts_split[tts_split['prop']<0.075].sort_values(by = ['prop'], ascending = False).head()

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
173,540,19,Beaches-East York,"POLYGON Z ((638656.922 4836787.529 0.000, 6386...",71260.925786,1015192.0,0.070195
588,336,1,Etobicoke North,"POLYGON Z ((613856.726 4836171.156 0.000, 6138...",161644.881877,2313692.0,0.069864
828,577,24,Scarborough-Guildwood,"POLYGON Z ((645013.277 4850845.706 0.000, 6450...",63007.501095,924133.6,0.06818
75,88,4,Parkdale-High Park,"POLYGON Z ((626737.291 4832360.777 0.000, 6267...",10701.102331,174906.1,0.061182
628,403,1,Etobicoke North,MULTIPOLYGON Z (((617047.348 4844127.528 0.000...,83183.431279,1551835.0,0.053603


In [10]:
tts_split[tts_split['prop']<0.075].to_file('remainder_tts_wards.geojson', driver = 'GeoJSON')

In [11]:
test[test['prop'] < 0.925].to_file('split_tts_zones_wards.geojson', driver = 'GeoJSON')

In [12]:
tts_split_cleaned = tts_split[tts_split['prop']>0.075]
tts_split_cleaned

Unnamed: 0,gta06,AREA_SHORT_CODE,AREA_NAME,geometry,area_split,area,prop
0,90,10,Spadina-Fort York,"POLYGON Z ((627728.491 4833632.008 0.000, 6278...",9.014072e+05,9.014072e+05,1.000000
1,67,10,Spadina-Fort York,"POLYGON Z ((629304.428 4834186.016 0.000, 6292...",3.904960e+05,3.904960e+05,1.000000
3,68,11,University-Rosedale,"POLYGON Z ((629149.036 4834678.525 0.000, 6291...",4.592420e+05,4.600036e+05,0.998344
5,75,11,University-Rosedale,"POLYGON Z ((628541.137 4834491.592 0.000, 6284...",3.656223e+05,3.661744e+05,0.998492
6,76,10,Spadina-Fort York,"POLYGON Z ((628699.184 4833978.518 0.000, 6286...",3.180885e+05,3.180885e+05,1.000000
...,...,...,...,...,...,...,...
907,605,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6399...",6.693021e+05,6.693021e+05,1.000000
908,611,23,Scarborough North,"POLYGON Z ((639768.817 4850978.597 0.000, 6398...",1.713185e+06,1.713185e+06,1.000000
909,604,23,Scarborough North,"POLYGON Z ((639249.148 4849900.597 0.000, 6392...",2.628566e+05,2.628566e+05,1.000000
910,616,23,Scarborough North,"POLYGON Z ((640595.570 4855210.096 0.000, 6408...",1.780954e+06,1.780988e+06,0.999981


In [13]:
tts_split_cleaned['prop'] = np.where(tts_split_cleaned['prop'] >= 0.925, 1, tts_split_cleaned['prop'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


# Splitting/Summing TTS Trips

In [14]:
tts_trips = pd.read_csv('/Volumes/Data2/RST/notebook/TTS OD/TTS_TYPE.csv')
tts_trips

Unnamed: 0,origin,destination,type,total
0,1,18,raw_am,15.000000
1,1,35,raw_am,14.000000
2,1,57,raw_am,20.000000
3,1,68,raw_am,14.000000
4,1,212,raw_am,8.000000
...,...,...,...,...
125063,624,493,imm_am,2.034835
125064,624,525,imm_am,1.643979
125065,624,542,imm_md,1.202403
125066,624,595,imm_pm,1.990851


In [15]:
tts_trips = tts_trips.merge(tts_split_cleaned[['gta06', 'prop', 'AREA_SHORT_CODE']], left_on = ['origin'], 
                right_on = ['gta06'])[['origin', 'destination', 'type', 'AREA_SHORT_CODE','total', 'prop']]

tts_trips = tts_trips.merge(tts_split_cleaned[['gta06', 'prop', 'AREA_SHORT_CODE']], left_on = ['destination'], 
                right_on = ['gta06'], suffixes = ['_o', '_d'])[['origin', 'destination', 
                                                                'type', 'AREA_SHORT_CODE_o', 
                                                                'total', 'prop_o', 'prop_d',
                                                               'AREA_SHORT_CODE_d']]

tts_trips

Unnamed: 0,origin,destination,type,AREA_SHORT_CODE_o,total,prop_o,prop_d,AREA_SHORT_CODE_d
0,1,18,raw_am,14,15.000000,0.225378,1.0,13
1,1,18,raw_am,19,15.000000,0.774622,1.0,13
2,1,18,lep_am,14,0.065406,0.225378,1.0,13
3,1,18,lep_am,19,0.065406,0.774622,1.0,13
4,1,18,rac_am,14,3.617538,0.225378,1.0,13
...,...,...,...,...,...,...,...,...
143915,621,623,lep_am,25,0.408945,1.000000,1.0,25
143916,621,623,rac_am,25,8.208310,1.000000,1.0,25
143917,621,623,ab_am,25,0.000026,1.000000,1.0,25
143918,621,623,bl_am,25,1.943062,1.000000,1.0,25


In [16]:
tts_trips['prop_all'] = tts_trips['prop_o'] * tts_trips['prop_d']
ward_trips = tts_trips.copy()
ward_trips['total'] = ward_trips['prop_all'] * ward_trips['total']

ward_trips = ward_trips.rename(columns = {'AREA_SHORT_CODE_o':'ward_o', 'AREA_SHORT_CODE_d':'ward_d'})

ward_trips = ward_trips[['ward_o', 'ward_d', 'type', 'total']]

In [17]:
ward_trips = ward_trips.groupby(['ward_o', 'ward_d', 'type']).sum().reset_index().sort_values(by = ['ward_o', 'ward_d', 'type'])
ward_trips

Unnamed: 0,ward_o,ward_d,type,total
0,1,1,ab_am,0.552704
1,1,1,ab_ev,0.118363
2,1,1,ab_md,0.811149
3,1,1,ab_pm,0.973774
4,1,1,bl_am,168.513442
...,...,...,...,...
15777,25,25,rac_pm,399.066513
15778,25,25,raw_am,423.889255
15779,25,25,raw_ev,206.497937
15780,25,25,raw_md,140.284162


In [18]:
ward_trips.to_csv('/Volumes/Data2/RST/notebook/TTS OD/WARD_TYPE.csv', index = False)