In [1]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
sample_trips = pd.read_csv("sample_trips_between_taz_zones.csv")

In [3]:
sample_trips.head()

Unnamed: 0,PersonID,OTAZ,DTAZ,Taxi Cost,TaxiTime(s)
0,30004031,377,425,10.10428,525.52
1,30004031,425,377,10.10428,525.52
2,30007491,1607,1607,8.223156,380.72
3,30007491,1607,1607,8.223156,380.72
4,30007492,1607,12,64.414162,3994.3


#### calculating average time and cost for each origin and destination pair

In [4]:
agg_trip = sample_trips.groupby(["OTAZ","DTAZ"]).agg({"Taxi Cost":'mean',"TaxiTime(s)":'mean'}).reset_index(level='DTAZ')
agg_trip.head()

Unnamed: 0_level_0,DTAZ,Taxi Cost,TaxiTime(s)
OTAZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,6.575988,197.76
1,32,11.4972,702.06
1,43,8.234352,380.92
1,91,14.851911,986.35
1,315,16.070569,1148.6


In [5]:
tz_info = {} ## dictionary tz -> dict(tz -> [cost,time,uncertainity])

In [6]:
for row in agg_trip.iterrows():
    otz = row[0]
    row = row[1]
    dest_info = tz_info.get(otz,{})
    dest_info[row['DTAZ']] = [row['Taxi Cost'],row['TaxiTime(s)']]
    tz_info[otz] = dest_info

### Function to calculate uncertainity of time and cost for each origin and destination pair
**Uncertainity = actual_value - avg_value**

In [7]:
def calc_uncertainity(row,cost=True):
    """
    row -> each row of data frame
    cost = True then calculate uncertainity in cost
           False then calculate uncertainity in time
    """
    if cost:
        #return round(row['Taxi Cost'] - tz_info[row['OTAZ']][row['DTAZ']][0],8)
        return row['Taxi Cost'] - tz_info[row['OTAZ']][row['DTAZ']][0]
    else:
        #return round(row['TaxiTime(s)'] - tz_info[row['OTAZ']][row['DTAZ']][1],8)
        return row['TaxiTime(s)'] - tz_info[row['OTAZ']][row['DTAZ']][1]

**Cost and time uncertainity calculated**

In [8]:
sample_trips['cost_uncertainity'] = sample_trips.apply(lambda row: calc_uncertainity(row,True),axis=1)
sample_trips['time_uncertainity'] = sample_trips.apply(lambda row: calc_uncertainity(row,False),axis=1)
agg_trip = sample_trips.groupby(["OTAZ","DTAZ"]).agg({"Taxi Cost":'mean',"TaxiTime(s)":'mean','cost_uncertainity':'mean','time_uncertainity':'mean'}).reset_index()
agg_trip.head()

Unnamed: 0,OTAZ,DTAZ,Taxi Cost,TaxiTime(s),cost_uncertainity,time_uncertainity
0,1,2,6.575988,197.76,0.0,0.0
1,1,32,11.4972,702.06,0.0,0.0
2,1,43,8.234352,380.92,0.0,0.0
3,1,91,14.851911,986.35,0.0,0.0
4,1,315,16.070569,1148.6,0.0,0.0


#### Shape file for the taxi zones is ingested

In [9]:
taxi_zones = gpd.read_file("taz_zones_wgs84.shp")

**dictionary to store corresponding geometries for each taxi zone**

In [10]:
zone_shp = {}
for row in taxi_zones.iterrows():
    row = row[1]
    zone_shp[row['TAZ_ID1']] = row['geometry']    

**Geometry of each taxi zone being appended to the aggregate data frame and converted into geodataframe**

In [11]:
agg_trip['O_geom'] = agg_trip.apply(lambda row: zone_shp[row['DTAZ']],axis =1 )
agg_trip.set_index('OTAZ',inplace=True)
agg_trip.head()

Unnamed: 0_level_0,DTAZ,Taxi Cost,TaxiTime(s),cost_uncertainity,time_uncertainity,O_geom
OTAZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2,6.575988,197.76,0.0,0.0,"POLYGON ((-74.01317399999999 40.719443, -74.01..."
1,32,11.4972,702.06,0.0,0.0,"POLYGON ((-73.99077299999999 40.731731, -73.99..."
1,43,8.234352,380.92,0.0,0.0,"POLYGON ((-73.998026 40.714393, -73.997788 40...."
1,91,14.851911,986.35,0.0,0.0,"POLYGON ((-73.99113899999999 40.74046, -73.990..."
1,315,16.070569,1148.6,0.0,0.0,"POLYGON ((-73.97271900000001 40.743418, -73.97..."


**Final aggregated geo dataframe with average time, average cost, average cost uncertainity and average time uncertanity**

In [12]:
agg_trip_geoDf = gpd.GeoDataFrame(agg_trip,geometry='O_geom')
agg_trip_geoDf.head()

Unnamed: 0_level_0,DTAZ,Taxi Cost,TaxiTime(s),cost_uncertainity,time_uncertainity,O_geom
OTAZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2,6.575988,197.76,0.0,0.0,"POLYGON ((-74.01317 40.71944, -74.01257 40.719..."
1,32,11.4972,702.06,0.0,0.0,"POLYGON ((-73.99077 40.73173, -73.99095 40.731..."
1,43,8.234352,380.92,0.0,0.0,"POLYGON ((-73.99803 40.71439, -73.99779 40.714..."
1,91,14.851911,986.35,0.0,0.0,"POLYGON ((-73.99114 40.74046, -73.99062 40.741..."
1,315,16.070569,1148.6,0.0,0.0,"POLYGON ((-73.97272 40.74342, -73.97296 40.742..."
