In [61]:
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="nrpgroup")

In [77]:
# reformat geojson file
file_name = "bexar-addresses-county.geojson"
city_name = "San Antonio"

with open(f"geojson/{file_name}", 'r') as f:
    file_lines = [''.join([x.strip(), ',', '\n']) for x in f.readlines()]

with open(f"geojson/{file_name}-formatted.geojson", 'w') as f:
    file_lines[-1] = file_lines[-1][:-2]
    f.write('[\n')
    f.writelines(file_lines)
    f.write('\n]')

with open(f"geojson/{file_name}-formatted.geojson") as f:
    data = json.load(f)

street_numbers = [int(i['properties']['number']) for i in data]
street_name = [i['properties']['street'] for i in data]
postcode = [i['properties']['postcode'] for i in data]
longitude = [i['geometry']['coordinates'][0] for i in data]
latitude = [i['geometry']['coordinates'][1] for i in data]

addresses = pd.DataFrame({'street_numbers': street_numbers, 'street_name': street_name, 'postcode': postcode, 'longitude': longitude, 'latitude': latitude})
addresses["full_address"] = addresses["street_numbers"].map(str) + " " + addresses["street_name"] + f", {city_name}, TX " + addresses["postcode"]

In [78]:
# address should be in the format of "1234 Main St, San Antonio, TX 78201"
miles_per_degree_lat = 68.93939393939394
miles_per_degree_lon = 54.5985401459854

def get_closest_addresses(address: str, df: pd.DataFrame, max_radius = 5, max_results = 100):
    location = geolocator.geocode(address)
    try:
        lat = location.latitude
        lon = location.longitude
    except AttributeError:
        print("Error: Address not found.")
        return
    
    distance = np.sqrt(((df["latitude"] - lat)*miles_per_degree_lat)**2 + ((df["longitude"] - lon)*miles_per_degree_lon)**2)
    distance.sort_values(inplace=True)
    candidates = distance[distance <= max_radius]
    
    indicies = None
    if len(candidates) > max_results:
        indices = candidates[:max_results].index
    else:
        indices = candidates.index

    df.loc[indices].to_csv(f"results/closest_addresses_to_{address}.csv", index=False)

In [79]:
get_closest_addresses("1938 South Zarzamora St, San Antonio TX", addresses)