In [1]:
import pandas as pd
from math import radians, cos, sin, atan2, sqrt

In [2]:
# Set this parameter to be the number of miles where you want to stop improving travel demand as distances decreases
minDist = 300

In [3]:
# read in data from combinedStat.csv
df = pd.read_csv('combinedstat.csv')
df.head()

Unnamed: 0,Combined statistical area,Central City,Geographic Name,Latitude,Longitude,2021 estimate,2020 census
0,"New York-Newark, NY-NJ-CT-PA Combined Statisti...","New York, NY",New York,40.713,-74.0072,23216685,23582649
1,"Los Angeles-Long Beach, CA Combined Statistica...","Los Angeles, CA",Los Angeles,34.0522,-118.2433,18490242,18644680
2,"Washington-Baltimore-Arlington, DC-MD-VA-WV-PA...","Washington, DC","Washington, D.C.",38.9072,-77.0369,9946526,9973383
3,"Chicago-Naperville, IL-IN-WI Combined Statisti...","Chicago, IL",Chicago,41.8832,-87.6324,9876339,9986960
4,"San Jose-San Francisco-Oakland, CA Combined St...","San Jose, CA",San Jose,37.3333,-121.9,9545921,9714023


In [4]:
# function that computes the distance between two latitude longitude pairs in miles
def distance(lat1, lon1, lat2, lon2):
		# approximate radius of earth in miles
		R = 3959.0
		lat1 = radians(lat1)
		lon1 = radians(lon1)
		lat2 = radians(lat2)
		lon2 = radians(lon2)
		dlon = lon2 - lon1
		dlat = lat2 - lat1
		a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
		c = 2 * atan2(sqrt(a), sqrt(1 - a))
		distance = R * c
		return distance

# make a distance matrix of the distances between all "Geographic Name" pairs
distances = pd.DataFrame(index=df["Geographic Name"], columns=df["Geographic Name"])
for i in range(len(df)):
		for j in range(len(df)):
				distances.iloc[i,j] = distance(float(df.iloc[i,3]), float(df.iloc[i,4]), float(df.iloc[j,3]), float(df.iloc[j,4]))

In [5]:
distances.head()

Geographic Name,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,Philadelphia,Atlanta,...,Dixon,Pullman,De Ridder,Kerrville,Columbus,Clovis,Martin,Cleveland,Steamboat Springs,Spencer
Geographic Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
New York,0.0,2445.63,203.521,711.155,2549.69,189.985,1370.62,1418.13,80.6817,746.017,...,806.412,2166.78,1273.39,1588.24,936.192,1648.47,856.043,1035.46,1712.14,1096.64
Los Angeles,2445.63,0.0,2294.22,1742.31,305.738,2590.78,1237.85,1370.16,2388.27,1932.58,...,1647.71,878.039,1467.93,1150.58,1707.03,859.231,1660.72,1573.76,767.867,1390.18
"Washington, D.C.",203.521,2294.22,0.0,593.79,2416.4,393.494,1182.09,1219.19,123.102,542.496,...,685.262,2080.02,1073.95,1394.97,736.024,1477.43,669.89,839.909,1580.41,986.128
Chicago,711.155,1742.31,593.79,0.0,1838.55,848.475,804.913,942.365,663.817,589.142,...,95.3648,1488.88,824.211,1038.63,581.171,988.486,388.418,586.776,1001.06,392.408
San Jose,2549.69,305.738,2416.4,1838.55,0.0,2678.92,1449.82,1607.71,2499.78,2109.55,...,1743.28,693.119,1690.02,1397.57,1893.69,1064.72,1819.56,1762.33,837.67,1460.09


In [6]:
# add the "2020 census" of the df dataframe as the first column of distances
distances.insert(0, "2020 census", df["2020 census"].values)

In [7]:
distances.head()

Geographic Name,2020 census,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,Philadelphia,...,Dixon,Pullman,De Ridder,Kerrville,Columbus,Clovis,Martin,Cleveland,Steamboat Springs,Spencer
Geographic Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
New York,23582649,0.0,2445.63,203.521,711.155,2549.69,189.985,1370.62,1418.13,80.6817,...,806.412,2166.78,1273.39,1588.24,936.192,1648.47,856.043,1035.46,1712.14,1096.64
Los Angeles,18644680,2445.63,0.0,2294.22,1742.31,305.738,2590.78,1237.85,1370.16,2388.27,...,1647.71,878.039,1467.93,1150.58,1707.03,859.231,1660.72,1573.76,767.867,1390.18
"Washington, D.C.",9973383,203.521,2294.22,0.0,593.79,2416.4,393.494,1182.09,1219.19,123.102,...,685.262,2080.02,1073.95,1394.97,736.024,1477.43,669.89,839.909,1580.41,986.128
Chicago,9986960,711.155,1742.31,593.79,0.0,1838.55,848.475,804.913,942.365,663.817,...,95.3648,1488.88,824.211,1038.63,581.171,988.486,388.418,586.776,1001.06,392.408
San Jose,9714023,2549.69,305.738,2416.4,1838.55,0.0,2678.92,1449.82,1607.71,2499.78,...,1743.28,693.119,1690.02,1397.57,1893.69,1064.72,1819.56,1762.33,837.67,1460.09


In [8]:
distances.to_csv('combDistances.csv')

In [9]:
# read combDistances.csv into a dataframe
distances = pd.read_csv('combDistances.csv')
distances.head()

Unnamed: 0,Geographic Name,2020 census,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,...,Dixon,Pullman,De Ridder,Kerrville,Columbus.2,Clovis,Martin,Cleveland.1,Steamboat Springs,Spencer
0,New York,23582649,0.0,2445.625644,203.520986,711.154942,2549.691628,189.9845,1370.619526,1418.125099,...,806.411543,2166.782527,1273.385223,1588.241896,936.191741,1648.471975,856.042564,1035.464104,1712.137348,1096.640671
1,Los Angeles,18644680,2445.625644,0.0,2294.223327,1742.310625,305.738353,2590.78139,1237.8451,1370.161356,...,1647.708197,878.039444,1467.930022,1150.575739,1707.030868,859.231347,1660.720617,1573.759602,767.866609,1390.17561
2,"Washington, D.C.",9973383,203.520986,2294.223327,0.0,593.789805,2416.402247,393.493779,1182.086327,1219.188099,...,685.2621,2080.022254,1073.947082,1394.972124,736.023702,1477.429468,669.889772,839.90936,1580.407407,986.127826
3,Chicago,9986960,711.154942,1742.310625,593.789805,0.0,1838.54613,848.474661,804.91257,942.364779,...,95.364792,1488.877313,824.210832,1038.634151,581.170553,988.486013,388.417856,586.77565,1001.060156,392.407879
4,San Jose,9714023,2549.691628,305.738353,2416.402247,1838.54613,0.0,2678.918664,1449.824208,1607.707556,...,1743.281477,693.119467,1690.017587,1397.572072,1893.686043,1064.715035,1819.556389,1762.329984,837.670247,1460.088061


In [10]:
import numpy
pop2020 = distances['2020 census'].to_numpy()
# drop the 2020 census column
distances = distances.drop(distances.columns[[1]], axis=1)
distances.head()

Unnamed: 0,Geographic Name,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,Philadelphia,...,Dixon,Pullman,De Ridder,Kerrville,Columbus.2,Clovis,Martin,Cleveland.1,Steamboat Springs,Spencer
0,New York,0.0,2445.625644,203.520986,711.154942,2549.691628,189.9845,1370.619526,1418.125099,80.681696,...,806.411543,2166.782527,1273.385223,1588.241896,936.191741,1648.471975,856.042564,1035.464104,1712.137348,1096.640671
1,Los Angeles,2445.625644,0.0,2294.223327,1742.310625,305.738353,2590.78139,1237.8451,1370.161356,2388.270988,...,1647.708197,878.039444,1467.930022,1150.575739,1707.030868,859.231347,1660.720617,1573.759602,767.866609,1390.17561
2,"Washington, D.C.",203.520986,2294.223327,0.0,593.789805,2416.402247,393.493779,1182.086327,1219.188099,123.101872,...,685.2621,2080.022254,1073.947082,1394.972124,736.023702,1477.429468,669.889772,839.90936,1580.407407,986.127826
3,Chicago,711.154942,1742.310625,593.789805,0.0,1838.54613,848.474661,804.91257,942.364779,663.817115,...,95.364792,1488.877313,824.210832,1038.634151,581.170553,988.486013,388.417856,586.77565,1001.060156,392.407879
4,San Jose,2549.691628,305.738353,2416.402247,1838.54613,0.0,2678.918664,1449.824208,1607.707556,2499.779772,...,1743.281477,693.119467,1690.017587,1397.572072,1893.686043,1064.715035,1819.556389,1762.329984,837.670247,1460.088061


In [11]:
# get the i,jth element of the distance matrix
def dist(i,j):
	return distances.iloc[i,j]

# make a travel demand df with the same dimensions as the distance matrix called tdemand
tdemand = pd.DataFrame(index=distances.index, columns=distances.columns)
# copy the first column of the distance matrix into the first column of the travel demand matrix
tdemand.iloc[:,0] = distances.iloc[:,0]
tdemand.head()

Unnamed: 0,Geographic Name,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,Philadelphia,...,Dixon,Pullman,De Ridder,Kerrville,Columbus.2,Clovis,Martin,Cleveland.1,Steamboat Springs,Spencer
0,New York,,,,,,,,,,...,,,,,,,,,,
1,Los Angeles,,,,,,,,,,...,,,,,,,,,,
2,"Washington, D.C.",,,,,,,,,,...,,,,,,,,,,
3,Chicago,,,,,,,,,,...,,,,,,,,,,
4,San Jose,,,,,,,,,,...,,,,,,,,,,


In [12]:
for i in range(0, len(pop2020)):
	for j in range(1, len(pop2020)+1):
		# set the i,jth element of tdemand to the travel demand between i and j
		if dist(i,j) == 0:
			tdemand.iloc[i,j] = 0
		else:
			# 27000 = 300mi^2 * 0.3
			# convert population to millions
			tdemand.iloc[i,j] = 27000*(((pop2020[i]*1.0e-6)**0.8) * ((pop2020[j-1]*1.0e-6)**0.8))/(max(dist(i,j), minDist)**2)

tdemand.to_csv("combTDemand.csv")

In [13]:
tdemand.head()

Unnamed: 0,Geographic Name,New York,Los Angeles,"Washington, D.C.",Chicago,San Jose,Boston,Dallas,Houston,Philadelphia,...,Dixon,Pullman,De Ridder,Kerrville,Columbus.2,Clovis,Martin,Cleveland.1,Steamboat Springs,Spencer
0,New York,0.0,0.587627,23.6739,4.21751,0.320909,20.7657,0.962268,0.826515,18.6049,...,0.075698,0.0102654,0.0291255,0.0176654,0.0499134,0.0144324,0.0510151,0.0318859,0.00845847,0.018853
1,Los Angeles,0.587627,0.0,0.335437,0.582242,18.4938,0.230726,0.977614,0.733679,0.24326,...,0.0150248,0.051802,0.0181615,0.0278931,0.0124404,0.0440203,0.0112322,0.0114383,0.0348472,0.00972164
2,"Washington, D.C.",23.6739,0.335437,0.0,3.03892,0.179481,6.06334,0.649878,0.561744,9.34601,...,0.0526605,0.00559588,0.0205697,0.0115034,0.0405661,0.00902588,0.0418488,0.0243447,0.0049869,0.0117123
3,Chicago,4.21751,0.582242,3.03892,0.0,0.310371,1.30552,1.40315,0.94127,1.91093,...,0.275061,0.0109335,0.0349614,0.0207733,0.0651346,0.0201853,0.124613,0.0499342,0.0124429,0.0740467
4,San Jose,0.320909,18.4938,0.179481,0.310371,0.0,0.12809,0.423004,0.316308,0.131799,...,0.00796725,0.0493439,0.00813309,0.0112216,0.00600034,0.0170169,0.00555397,0.00541427,0.0173807,0.00523114
