# Here, we'll calculate a (aerial) distance from the college to the city of Chicago
(Other networks could use the same code and simply replace our lat/long with theirs by changing the home_lat and home_lon variables below)

In [1]:
import pandas as pd
import numpy as np
import os

# Edit these to reflect any changes
os.chdir('inputs')
directory_file = 'hd2021.csv'
output_file = 'distance_calcs.csv'
home_lat = 41.88283 # Change this if using a city other than Chicago
home_lon = -87.6276 # Change this if using a city other than Chicago

In [3]:
df = pd.read_csv(directory_file, index_col=['UNITID'], usecols=['UNITID','LONGITUD','LATITUDE'],
                na_values='.', encoding='cp1252')
print('Lat/Long info loaded', flush=True)
df.head()

Lat/Long info loaded


Unnamed: 0_level_0,LONGITUD,LATITUDE
UNITID,Unnamed: 1_level_1,Unnamed: 2_level_1
100654,-86.568502,34.783368
100663,-86.799345,33.505697
100690,-86.17401,32.362609
100706,-86.640449,34.724557
100724,-86.295677,32.364317


In [4]:
# These calculations are based on geometry incorporating approximations of the earth's curvature
# Calculations are based on the Haversine formula: https://en.wikipedia.org/wiki/Haversine_formula

df['latrad'] = np.deg2rad(df.LATITUDE)
df['lonrad'] = np.deg2rad(df.LONGITUD)
home_latrad = np.deg2rad(home_lat)
home_lonrad = np.deg2rad(home_lon)
df['dlat'] = df['latrad']-home_latrad
df['dlon'] = df['lonrad']-home_lonrad
df['a'] = np.sin(df.dlat/2)**2+np.cos(home_latrad)*np.cos(df.latrad)*np.sin(df.dlon/2)**2
df['c'] = 2*np.arcsin(np.sqrt(df['a']))
df['dist'] = np.round(df['c']*3956)
df.head()

Unnamed: 0_level_0,LONGITUD,LATITUDE,latrad,lonrad,dlat,dlon,a,c,dist
UNITID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100654,-86.568502,34.783368,0.607084,-1.510905,-0.123909,0.018485,0.003886,0.124751,494.0
100663,-86.799345,33.505697,0.584785,-1.514934,-0.146209,0.014456,0.005367,0.146653,580.0
100690,-86.17401,32.362609,0.564834,-1.50402,-0.166159,0.02537,0.006988,0.167378,662.0
100706,-86.640449,34.724557,0.606058,-1.512161,-0.124935,0.017229,0.003943,0.125662,497.0
100724,-86.295677,32.364317,0.564864,-1.506144,-0.166129,0.023246,0.006969,0.167154,661.0


In [5]:
# Finally, we'll save the calculations for inclusion in the final directory

output_file = 'distance_calcs.csv'
df.to_csv(output_file, index=True, columns=['dist'], na_rep='N/A')