In [2]:
import pandas as pd
# run this command to allow more data to be displayed than default
pd.set_option('display.max_rows', 200)

In [3]:
#read in the data  (this is the dis1.zip file from Maarten)
travel_car = pd.read_csv("Data/dis1.csv")
#Check that it's been read in 
travel_car

Unnamed: 0,origin_i,origin_msoacode,destination_j,destination_msoacode,data
0,0,E02000001,0,E02000001,0.871131
1,0,E02000001,1,E02000002,12.409382
2,0,E02000001,2,E02000003,11.578065
3,0,E02000001,3,E02000004,14.845719
4,0,E02000001,4,E02000005,11.518362
...,...,...,...,...,...
71166091,8435,S02001235,8431,S02001231,10.017610
71166092,8435,S02001235,8432,S02001232,0.700298
71166093,8435,S02001235,8433,S02001233,5.011075
71166094,8435,S02001235,8434,S02001234,1.850456


In [4]:
# drop the origin_i and destination_j columns
travel_car.drop(['origin_i', 'destination_j'], axis=1, inplace=True)
# rename the data column 
travel_car.rename({'data': 'time_car_min'}, axis=1, inplace=True)

The data column is travel time between MSOAs in minutes. We want to get:

    1 - UNWEIGHTED Average Travel Time From MSOA to all Other MSOAs (by mode)

    2 - WEIGHTED Average Travel Time From MSOA to all Other MSOAs (by mode)

1 - UNWEIGHTED Average Travel Time From MSOA to all Other MSOAs (by mode)

In [5]:
#merge by origin MSOA and average over all transport options
avg_car = travel_car.groupby("origin_msoacode").mean()
# rename the time_bus_min column 
avg_car.rename({'time_car_min': 'avg_time_from_origin_car_UNWEIGHTED'}, axis=1, inplace=True)
avg_car

Unnamed: 0_level_0,avg_time_from_origin_car_UNWEIGHTED
origin_msoacode,Unnamed: 1_level_1
E02000001,147.826471
E02000002,154.434657
E02000003,154.333970
E02000004,157.789391
E02000005,154.835718
...,...
W02000419,171.164667
W02000420,148.706129
W02000421,206.010322
W02000422,180.837238


2 - WEIGHTED Average Travel Time From MSOA to all Other MSOAs (by mode)

In [8]:
#import the flow data (from here https://www.nomisweb.co.uk/census/2011/bulk/rOD1)
flow = pd.read_csv("Data/wu03ew_msoa.csv")
flow

Unnamed: 0,Area of residence,Area of workplace,All categories: Method of travel to work,Work mainly at or from home,"Underground, metro, light rail, tram",Train,"Bus, minibus or coach",Taxi,"Motorcycle, scooter or moped",Driving a car or van,Passenger in a car or van,Bicycle,On foot,Other method of travel to work
0,E02000001,E02000001,1506,0,73,41,32,9,1,8,1,33,1304,4
1,E02000001,E02000014,2,0,2,0,0,0,0,0,0,0,0,0
2,E02000001,E02000016,3,0,1,0,2,0,0,0,0,0,0,0
3,E02000001,E02000025,1,0,0,1,0,0,0,0,0,0,0,0
4,E02000001,E02000028,1,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2402196,W02000423,W02000411,6,0,0,0,3,0,0,1,0,0,2,0
2402197,W02000423,W02000412,58,0,0,0,10,0,1,29,1,4,13,0
2402198,W02000423,W02000415,3,0,0,0,0,0,0,3,0,0,0,0
2402199,W02000423,W02000422,525,0,1,2,17,2,0,125,11,31,333,3


In [9]:
# combine the two columns that account for private transport
flow['car'] = flow['Driving a car or van'] + flow['Passenger in a car or van']

In [10]:
# keep only the car column
flow = flow[['Area of residence','Area of workplace', 'car']]
flow

Unnamed: 0,Area of residence,Area of workplace,car
0,E02000001,E02000001,9
1,E02000001,E02000014,0
2,E02000001,E02000016,0
3,E02000001,E02000025,0
4,E02000001,E02000028,1
...,...,...,...
2402196,W02000423,W02000411,1
2402197,W02000423,W02000412,30
2402198,W02000423,W02000415,3
2402199,W02000423,W02000422,136


In [11]:
# merge the df with the travel times to the flow df. The condition is that the origin MSOA 
# and the Destination MSOA are the same
flow_distance = pd.merge(travel_car, flow,  how='left', 
                         left_on=['origin_msoacode','destination_msoacode'], 
                         right_on = ['Area of residence','Area of workplace'])
flow_distance.head(5)

Unnamed: 0,origin_msoacode,destination_msoacode,time_car_min,Area of residence,Area of workplace,car
0,E02000001,E02000001,0.871131,E02000001,E02000001,9.0
1,E02000001,E02000002,12.409382,,,
2,E02000001,E02000003,11.578065,,,
3,E02000001,E02000004,14.845719,,,
4,E02000001,E02000005,11.518362,,,


In [12]:
#replace all Nan values in the last column with 0
flow_distance['car'].fillna(0, inplace=True)
# check 
flow_distance.head(5)

Unnamed: 0,origin_msoacode,destination_msoacode,time_car_min,Area of residence,Area of workplace,car
0,E02000001,E02000001,0.871131,E02000001,E02000001,9.0
1,E02000001,E02000002,12.409382,,,0.0
2,E02000001,E02000003,11.578065,,,0.0
3,E02000001,E02000004,14.845719,,,0.0
4,E02000001,E02000005,11.518362,,,0.0


In [13]:
# create a new column with the weighted travel time
flow_distance['cumulative_time_car'] = flow_distance['time_car_min'] * flow_distance['car']
#check
flow_distance.head(5)

Unnamed: 0,origin_msoacode,destination_msoacode,time_car_min,Area of residence,Area of workplace,car,cumulative_time_car
0,E02000001,E02000001,0.871131,E02000001,E02000001,9.0,7.840181
1,E02000001,E02000002,12.409382,,,0.0,0.0
2,E02000001,E02000003,11.578065,,,0.0,0.0
3,E02000001,E02000004,14.845719,,,0.0,0.0
4,E02000001,E02000005,11.518362,,,0.0,0.0


In [14]:
# group by origin MSOA and get the sum
avg_car_weighted = flow_distance.groupby("origin_msoacode").sum()
avg_car_weighted.head(5)

Unnamed: 0_level_0,time_car_min,car,cumulative_time_car
origin_msoacode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E02000001,1247064.0,113.0,1970.515125
E02000002,1302811.0,982.0,9947.762828
E02000003,1301961.0,1616.0,15323.978925
E02000004,1331111.0,1221.0,12102.639555
E02000005,1306194.0,1383.0,12484.379844


In [15]:
# add column to get average travel time by car
avg_car_weighted['avg_time_car'] = avg_car_weighted['cumulative_time_car'] / avg_car_weighted['car']
avg_car_weighted.head(5)

Unnamed: 0_level_0,time_car_min,car,cumulative_time_car,avg_time_car
origin_msoacode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
E02000001,1247064.0,113.0,1970.515125,17.438187
E02000002,1302811.0,982.0,9947.762828,10.130105
E02000003,1301961.0,1616.0,15323.978925,9.48266
E02000004,1331111.0,1221.0,12102.639555,9.912072
E02000005,1306194.0,1383.0,12484.379844,9.027028


In [16]:
#merge the avg travel time df with this df 

car = pd.merge(avg_car, avg_car_weighted,  how='left', 
                         left_on=['origin_msoacode'], 
                         right_on = ['origin_msoacode'])
car

Unnamed: 0_level_0,avg_time_from_origin_car_UNWEIGHTED,time_car_min,car,cumulative_time_car,avg_time_car
origin_msoacode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
E02000001,147.826471,1.247064e+06,113.0,1970.515125,17.438187
E02000002,154.434657,1.302811e+06,982.0,9947.762828,10.130105
E02000003,154.333970,1.301961e+06,1616.0,15323.978925,9.482660
E02000004,157.789391,1.331111e+06,1221.0,12102.639555,9.912072
E02000005,154.835718,1.306194e+06,1383.0,12484.379844,9.027028
...,...,...,...,...,...
W02000419,171.164667,1.443945e+06,3125.0,33063.140118,10.580205
W02000420,148.706129,1.254485e+06,4655.0,69324.438859,14.892468
W02000421,206.010322,1.737903e+06,3276.0,68592.942606,20.938017
W02000422,180.837238,1.525543e+06,2505.0,33049.315705,13.193340


In [17]:
# drop time_bus_min: we don't want a sum of the time from each MSOA to all other MSOAs
car.drop('time_car_min', axis=1, inplace=True)
car

Unnamed: 0_level_0,avg_time_from_origin_car_UNWEIGHTED,car,cumulative_time_car,avg_time_car
origin_msoacode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
E02000001,147.826471,113.0,1970.515125,17.438187
E02000002,154.434657,982.0,9947.762828,10.130105
E02000003,154.333970,1616.0,15323.978925,9.482660
E02000004,157.789391,1221.0,12102.639555,9.912072
E02000005,154.835718,1383.0,12484.379844,9.027028
...,...,...,...,...
W02000419,171.164667,3125.0,33063.140118,10.580205
W02000420,148.706129,4655.0,69324.438859,14.892468
W02000421,206.010322,3276.0,68592.942606,20.938017
W02000422,180.837238,2505.0,33049.315705,13.193340


In [18]:
#save to csv
car.to_csv('travel_time_car.csv')