In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_style('whitegrid')

In [2]:
def spherical_distance(lat1,lon1,lat2,lon2):
    return np.arccos(np.sin(lat1)*np.sin(lat2)+np.cos(lat1)*np.cos(lat2)*np.cos(lon2-lon1))*6371

In [3]:
file = "../../FoodEnvironmentAtlas.xls"
data = pd.ExcelFile(file)
sheet_dict = {sheet_name: data.parse(sheet_name) for sheet_name in data.sheet_names}

In [4]:
county_census_data = pd.read_csv('county_census_data.csv')

In [5]:
access = sheet_dict['ACCESS']
access = access.merge(right = county_census_data.copy(), how='inner')

In [6]:
access = access.dropna()

In [37]:
fips = access['FIPS'].to_list()
num_counties = len(fips)

In [18]:
access.loc[access['FIPS'] == fips[0]]['Latitude'].values[0], access.loc[access['FIPS'] == fips[0]]['Longitude'].values[0]

(32.536382, -86.64449)

In [24]:
spherical_distance(access.loc[access['FIPS'] == fips[0]]['Latitude'].values[0],
    access.loc[access['FIPS'] == fips[0]]['Longitude'].values[0],
    access.loc[access['FIPS'] == fips[1]]['Latitude'].values[0],
    access.loc[access['FIPS'] == fips[1]]['Longitude'].values[0]
)

13160.755359446022

In [None]:
distances = np.zeros((num_counties, num_counties))
for i in range(num_counties):
    for j in range(num_counties):
        distances[i][j] = spherical_distance(
            access.loc[access['FIPS'] == fips[i]]['Latitude'].values[0], 
            access.loc[access['FIPS'] == fips[i]]['Longitude'].values[0], 
            access.loc[access['FIPS'] == fips[j]]['Latitude'].values[0], 
            access.loc[access['FIPS'] == fips[j]]['Longitude'].values[0])
    print(i)

In [70]:
distances[type(distances) is float]

array([], shape=(0, 3029, 3029), dtype=float64)

In [86]:
np.isnan(distances[0][0])

False

In [91]:
distances_rounded = np.zeros((num_counties, num_counties))
for i in range(num_counties):
    for j in range(num_counties):
        if np.isnan(distances[i][j]):
            distances_rounded[i][j] = 0
        else:
            distances_rounded[i][j] = int(distances[i][j])

In [92]:
distances_rounded[0]

array([    0., 13160.,  6502., ..., 13509., 12210.,  9127.])

In [122]:
max_neighbors = 20
neighbor_labels = ['NEAREST_' + str(i + 1) for i in range(max_neighbors)]
closest_neighbors = []

In [123]:
for i in range(num_counties):
    j = 0
    dist = distances_rounded[i].copy()
    closest = []
    while j < max_neighbors:
        k = np.argmin(dist)
        closest.append(fips[k])
        dist[k] = np.inf
        j += 1
    closest_neighbors.append(closest)

In [125]:
neighbors_df = pd.DataFrame(access[['FIPS', 'County']])

In [126]:
for j in range(max_neighbors):
    neighbors_df[neighbor_labels[j]] = [closest_neighbors[i][j] for i in range(num_counties)]

In [129]:
neighbors_df.to_csv('closest_neighboring_counties.csv',index=False)

In [None]:
distances_df = pd.DataFrame(access[['FIPS', 'County']])
distance_labels = ['DISTANCE_TO_' + str(fips[i]) for i in range(num_counties)]

for i in range(num_counties):
    distances_df[distance_labels[i]] = distances_rounded[:,i]

In [141]:
distances_df

Unnamed: 0,FIPS,County,DISTANCE_TO_1001,DISTANCE_TO_1003,DISTANCE_TO_1005,DISTANCE_TO_1007,DISTANCE_TO_1009,DISTANCE_TO_1011,DISTANCE_TO_1013,DISTANCE_TO_1015,...,DISTANCE_TO_56027,DISTANCE_TO_56029,DISTANCE_TO_56031,DISTANCE_TO_56033,DISTANCE_TO_56035,DISTANCE_TO_56037,DISTANCE_TO_56039,DISTANCE_TO_56041,DISTANCE_TO_56043,DISTANCE_TO_56045
0,1001,Autauga,0.0,13160.0,6502.0,3034.0,9174.0,4385.0,5002.0,7193.0,...,13946.0,8587.0,18423.0,5061.0,16959.0,12418.0,8580.0,13509.0,12210.0,9127.0
1,1003,Baldwin,13160.0,0.0,15470.0,14979.0,14176.0,14802.0,9324.0,12002.0,...,7974.0,18281.0,5308.0,10042.0,7429.0,3826.0,13523.0,4680.0,7274.0,11442.0
2,1005,Barbour,6502.0,15470.0,0.0,7083.0,10377.0,2220.0,7449.0,11748.0,...,8839.0,5295.0,14201.0,11488.0,11011.0,11715.0,2488.0,19877.0,17230.0,4239.0
3,1007,Bibb,3034.0,14979.0,7083.0,0.0,6154.0,5670.0,8036.0,4954.0,...,15877.0,6592.0,18119.0,5033.0,17611.0,15409.0,9561.0,12859.0,10643.0,10904.0
4,1009,Blount,9174.0,14176.0,10377.0,6154.0,0.0,10518.0,14138.0,3855.0,...,15234.0,5581.0,12070.0,7859.0,12995.0,17852.0,12170.0,9502.0,6908.0,14294.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3059,56037,Sweetwater,12418.0,3826.0,11715.0,15409.0,17852.0,11653.0,7452.0,15429.0,...,5016.0,15973.0,6169.0,12137.0,6185.0,0.0,9706.0,8431.0,11071.0,7615.0
3060,56039,Teton,8580.0,13523.0,2488.0,9561.0,12170.0,4223.0,7797.0,14194.0,...,6350.0,6618.0,11840.0,13638.0,8661.0,9706.0,0.0,17606.0,18583.0,2124.0
3061,56041,Uinta,13509.0,4680.0,19877.0,12859.0,9502.0,17825.0,12669.0,8166.0,...,11258.0,14585.0,5853.0,8506.0,9045.0,8431.0,17606.0,0.0,2657.0,15893.0
3062,56043,Washakie,12210.0,7274.0,17230.0,10643.0,6908.0,16297.0,13628.0,5725.0,...,13485.0,12293.0,7731.0,7279.0,10776.0,11071.0,18583.0,2657.0,0.0,18465.0


In [142]:
distances_df.to_csv('county_centroid_distances.csv', index=False)