# FIPS to Market Id's

The purpose of this notebook is simple: we are taking fips and mapping them to their corresponding market id's so that we are able to figure out what airline routes are applicable to what fips. This will allow us to cross-reference gdp data with the counties that are affected. We do this by finding the closest market_id to the county latitude and longitude that we are provided.

In [9]:
import os
import pandas as pd

path_to_economic_growth_fips = "/Users/tristanbrigham/Desktop/Citadel Datathon/Local Important Data/Auxillary Data/FIPS econ data/us_county_latlng.csv"
fips_mapping_df = pd.read_csv(path_to_economic_growth_fips)

path_to_mkt_id = "/Users/tristanbrigham/Desktop/Citadel Datathon/Citadel_Correlation_One_Datathon/crucial_data_and_files/code_to_region_state_mapping.csv"
mkt_df = pd.read_csv(path_to_mkt_id)

In [10]:
# for calculating disance to market id lat and lon
from math import sin, cos, sqrt, atan2, radians

def calculate_distance(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1_rad, lon1_rad, lat2_rad, lon2_rad = map(radians, [lat1, lon1, lat2, lon2])

    # Radius of the Earth in kilometers
    earth_radius_km = 6371.0

    # Haversine formula
    d_lat = lat2_rad - lat1_rad
    d_lon = lon2_rad - lon1_rad

    a = sin(d_lat / 2) ** 2 + cos(lat1_rad) * cos(lat2_rad) * sin(d_lon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Calculate the distance in kilometers
    distance_km = earth_radius_km * c

    return distance_km

In [11]:
# Find the closest row in df2 for each row in df1
closest_mkt_id = []

for index1, row1 in fips_mapping_df.iterrows():
    closest_row = None
    min_distance = float('inf')

    for index2, row2 in mkt_df.iterrows():
        distance = calculate_distance(row1['lat'], row1['lng'], row2['latitude'], row2['longitude'])

        if distance < min_distance:
            min_distance = distance
            closest_row = row2

    closest_mkt_id.append(closest_row["market_id"])

# Create a new DataFrame with the closest rows
result_df = pd.DataFrame(closest_mkt_id)

fips_mapping_df["mkt_id"] = result_df

print(result_df)

          0
0     33277
1     30466
2     33277
3     30599
4     30599
...     ...
3228  33360
3229  33360
3230  33360
3231  33360
3232  33360

[3233 rows x 1 columns]


In [13]:
fips_mapping_df.to_csv(path_to_economic_growth_fips)