In [1]:
import pandas as pd
from haversine import haversine, Unit
import time

In [2]:
# Load the data
properties = pd.read_csv('../data/Airport Distance/bq-unique-property-postcodes.csv')
airports = pd.read_csv('../data/Airport Distance/bq-england-airports.csv')
properties_airports_shortest_distance = properties.copy()

In [3]:
properties

Unnamed: 0,Postcode,Lat,Long
0,GL8 8HA,51.640620,-2.152272
1,SS2 4RJ,51.547842,0.739400
2,DA14 6BQ,51.422746,0.099688
3,KT6 6HR,51.393117,-0.299780
4,ME7 4EB,51.380130,0.557023
...,...,...,...
1192273,WS13 6SY,52.688117,-1.806688
1192274,LE17 6LB,52.454033,-1.057775
1192275,CB5 8FS,52.209282,0.136840
1192276,HD9 7BE,53.589470,-1.762828


In [4]:

# Prepare a dataframe for the distances
distances = pd.DataFrame()

In [5]:
# Check the range of Lat and Long in properties data
print(properties['Lat'].min(), properties['Lat'].max())
print(properties['Long'].min(), properties['Long'].max())



49.895171 55.797415
-6.352647 1.762773


In [6]:
# Remove any rows with invalid Lat and Long values
# properties = properties[(properties['Lat'] >= -90) & (properties['Lat'] <= 90)]
# properties = properties[(properties['Long'] >= -180) & (properties['Long'] <= 180)]
properties


Unnamed: 0,Postcode,Lat,Long
0,GL8 8HA,51.640620,-2.152272
1,SS2 4RJ,51.547842,0.739400
2,DA14 6BQ,51.422746,0.099688
3,KT6 6HR,51.393117,-0.299780
4,ME7 4EB,51.380130,0.557023
...,...,...,...
1192273,WS13 6SY,52.688117,-1.806688
1192274,LE17 6LB,52.454033,-1.057775
1192275,CB5 8FS,52.209282,0.136840
1192276,HD9 7BE,53.589470,-1.762828


In [7]:
# Iterate over each airport
for i, airport in airports.iterrows():
    airport_loc = (airport['Y'], airport['X'])
    # Start timing the operation
    start_time = time.time()
    
    distances[airport['Airport']] = properties.apply(lambda x: haversine((x['Lat'], x['Long']), airport_loc), axis=1)
    
    # Calculate and print the elapsed time
    elapsed_time = time.time() - start_time
    print(f"Elapsed time for {airport['Airport']}: {elapsed_time} seconds")

Elapsed time for Biggin Hill: 4.435509920120239 seconds
Elapsed time for Birmingham: 4.563212156295776 seconds
Elapsed time for Blackpool: 4.357810020446777 seconds
Elapsed time for Bournemouth: 4.330810070037842 seconds
Elapsed time for Bristol: 4.282088279724121 seconds
Elapsed time for Cambridge: 4.31036114692688 seconds
Elapsed time for Carlisle: 4.268824100494385 seconds
Elapsed time for Coventry: 4.29690408706665 seconds
Elapsed time for Doncaster Sheffield: 4.280933141708374 seconds
Elapsed time for Durham Tees Valley: 4.28282618522644 seconds
Elapsed time for East Midlands International: 4.329210042953491 seconds
Elapsed time for Exeter: 4.259700059890747 seconds
Elapsed time for Gatwick: 4.287750720977783 seconds
Elapsed time for Gloucestershire: 4.25236701965332 seconds
Elapsed time for Heathrow: 4.2681849002838135 seconds
Elapsed time for Humberside: 4.278806209564209 seconds
Elapsed time for Isles Of Scilly (St.Marys): 4.287585020065308 seconds
Elapsed time for Isles Of Sci

In [8]:

# Join the distances to the properties dataframe
properties = pd.concat([properties, distances], axis=1)


In [9]:
properties

Unnamed: 0,Postcode,Lat,Long,Biggin Hill,Birmingham,Blackpool,Bournemouth,Bristol,Cambridge,Carlisle,...,Newcastle,Newquay,Norwich,Oxford (Kidlington),Penzance Heliport,Plymouth,Shoreham,Southampton,Southend,Stansted
0,GL8 8HA,51.640620,-2.152272,154.874188,94.493102,245.080798,98.507519,48.220524,171.611084,369.621915,...,379.055852,239.402949,260.409953,61.302675,289.634366,192.489192,157.614412,94.468688,197.383818,166.796919
1,SS2 4RJ,51.547842,0.739400,54.787702,197.488756,355.630895,198.722477,239.867905,83.304269,444.979366,...,420.405972,419.917571,130.037222,145.116586,466.525885,361.607378,107.068842,160.309331,3.444521,51.019732
2,DA14 6BQ,51.422746,0.099688,11.233875,170.502517,336.994524,152.789553,195.183500,87.753820,436.531496,...,419.354196,373.538261,160.337214,107.702747,420.097942,315.190168,70.959032,114.291716,44.885251,52.400666
3,KT6 6HR,51.393117,-0.299780,23.814591,153.862022,323.438784,127.054922,167.486919,96.552402,428.464147,...,415.809382,345.949648,178.282787,85.340913,392.764072,287.999372,62.221911,88.608391,72.204810,66.223631
4,ME7 4EB,51.380130,0.557023,37.061823,197.751005,360.793906,179.854479,226.958423,96.036252,455.024402,...,433.457147,403.216168,151.620718,138.684438,449.116653,344.002639,84.843790,141.756840,23.376969,60.386087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192273,WS13 6SY,52.688117,-1.806688,196.268131,26.558076,146.441736,212.439269,157.756844,144.317150,259.044312,...,261.459749,332.968600,207.897209,101.108461,383.486415,297.928405,231.087356,195.557415,211.730238,165.258436
1192274,LE17 6LB,52.454033,-1.057775,145.357837,46.467651,198.007143,193.921912,164.496346,87.937534,299.617588,...,290.399780,352.814949,159.639909,71.611974,403.502843,309.361974,187.792409,168.352304,155.584011,108.665863
1192275,CB5 8FS,52.209282,0.136840,97.844004,130.596312,275.093938,209.574160,216.433740,2.507779,360.634134,...,336.920093,407.193406,92.713704,108.003968,456.775727,355.616635,155.809002,173.850256,81.085850,36.616235
1192276,HD9 7BE,53.589470,-1.762828,278.820806,126.445194,86.847770,312.693870,253.579441,200.925350,164.954368,...,161.191158,413.760758,227.126839,197.962716,462.953328,386.954450,322.415854,294.625254,279.656124,232.443412


In [10]:
properties_airports_shortest_distance['Nearest_Airport_Distance'] = distances.min(axis=1)
properties_airports_shortest_distance['Nearest_Airport'] = distances.idxmin(axis=1)
properties_airports_shortest_distance

Unnamed: 0,Postcode,Lat,Long,Nearest_Airport_Distance,Nearest_Airport
0,GL8 8HA,51.640620,-2.152272,28.157945,Gloucestershire
1,SS2 4RJ,51.547842,0.739400,3.444521,Southend
2,DA14 6BQ,51.422746,0.099688,9.768120,London City
3,KT6 6HR,51.393117,-0.299780,13.706516,Heathrow
4,ME7 4EB,51.380130,0.557023,23.376969,Southend
...,...,...,...,...,...
1192273,WS13 6SY,52.688117,-1.806688,26.558076,Birmingham
1192274,LE17 6LB,52.454033,-1.057775,30.139959,Coventry
1192275,CB5 8FS,52.209282,0.136840,2.507779,Cambridge
1192276,HD9 7BE,53.589470,-1.762828,31.673731,Leeds Bradford


In [12]:
# Specify the output file path for the merged CSV file
output_file_path = f'properties_airports_shortest_distance.csv'

# Write the merged_data DataFrame to a CSV file
properties_airports_shortest_distance.to_csv(output_file_path)