## Determining the 20 closest neighboring counties to each county

This notebook determines the 20 closest counties to each county using the U.S. Census Bureau's Gazetteer data giving the latitude and longitude of each county's centroid. This data is stored in 'closest_neighboring_counties.csv' in the 'data' folder.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_style('whitegrid')

In [2]:
def spherical_distance(lat1,lon1,lat2,lon2):
    return np.arccos(np.sin(lat1)*np.sin(lat2)+np.cos(lat1)*np.cos(lat2)*np.cos(lon2-lon1))*6371

In [3]:
county_census_data = pd.read_csv('../data/county_census_data.csv')
num_counties = len(county_census_data)
fips = county_census_data['FIPS'].to_list()

In [None]:
# Calculate spherical distances between centroids of counties
distances = np.zeros((num_counties, num_counties))
for i in range(num_counties):
    for j in range(i):
        distances[i][j] = spherical_distance(
            county_census_data.loc[county_census_data['FIPS'] == fips[i]]['INTPTLAT'].values[0], 
            county_census_data.loc[county_census_data['FIPS'] == fips[i]]['INTPTLONG'].values[0], 
            county_census_data.loc[county_census_data['FIPS'] == fips[j]]['INTPTLAT'].values[0], 
            county_census_data.loc[county_census_data['FIPS'] == fips[j]]['INTPTLONG'].values[0])
        distances[j][i] = distances[i][j]
for i in range(num_counties):
    for j in range(num_counties):
        if np.isnan(distances[i][j]):
            distances[i][j] = 0

In [5]:
max_neighbors = 20
neighbor_labels = ['NEAREST_' + str(i + 1) for i in range(max_neighbors)]
closest_neighbors = []

In [6]:
# Find the 20 closest neighbors of each county
for i in range(num_counties):
    j = 0
    dist = distances[i].copy()
    closest = []
    while j < max_neighbors:
        k = np.argmin(dist)
        closest.append(fips[k])
        dist[k] = np.inf
        j += 1
    closest_neighbors.append(closest)

In [7]:
# Save lists of neighboring counties to new DataFrame
neighbors_df = pd.DataFrame(county_census_data[['FIPS', 'County']])

In [8]:
for j in range(max_neighbors):
    neighbors_df[neighbor_labels[j]] = [closest_neighbors[i][j] for i in range(num_counties)]

In [12]:
neighbors_df.to_csv('../data/closest_neighboring_counties.csv',index=False)

In [None]:
# Save distances to new DataFrame
distances_df = pd.DataFrame(county_census_data[['FIPS', 'County']]).copy()
distance_labels = ['DISTANCE_TO_' + str(fips[i]) for i in range(num_counties)]

for i in range(num_counties):
    distances_df[distance_labels[i]] = distances[:,i]

In [14]:
distances_df.to_csv('../data/county_centroid_distances.csv', index=False)