In [13]:
import os  
a = os.getcwd()
print(a)

c:\Users\ELipt\OneDrive\All Files\Desktop\invest_atlanta_group_1-1


In [14]:
import geopandas as gpd

In [15]:
import math
import pandas as pd


def haversine_miles(lat1, lon1, lat2, lon2):
    R = 3958.8  # miles
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi / 2.0) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2.0) ** 2
    return 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a))

def find_col(df, candidates):
    for c in candidates:
        for col in df.columns:
            if col.strip().lower() == c.lower():
                return col
    for c in candidates:
        for col in df.columns:
            if c.lower() in col.strip().lower():
                return col
    return None

# Set up BASE path
BASE = r"C:\Users\ELipt\OneDrive\All Files\Desktop\invest_atlanta_group_1-1"

# Load data
stadium_path = os.path.join(BASE, "data", "stadium", "stadium_location.csv")
business_path = os.path.join(BASE, "data", "business_records", "Atlanta_Business_License_Records_2025(Food Licenses).csv")

stadium_df = pd.read_csv(stadium_path, encoding='latin-1', header=None)
business_df = pd.read_csv(business_path, encoding='latin-1')

# Extract stadium coordinates
stad_lat = float(stadium_df.iloc[0, 0].split(',')[0].strip())
stad_lon = float(stadium_df.iloc[0, 0].split(',')[1].strip())

# Set up column names
biz_lat_col = "latitude"
biz_lon_col = "longitude"

print(f"Stadium location: ({stad_lat}, {stad_lon})")
print(f"Total businesses loaded: {len(business_df)}")


Stadium location: (33.7553276, -84.4031654)
Total businesses loaded: 28135


In [21]:
print("Available columns in business_df:")
print(business_df.columns.tolist())
print("\n")

name_col = find_col(business_df, ["company_name", "business name", "dba", "name", "business", "business_name"])
print(f"Name column: {name_col}")
print(f"Lat column: {biz_lat_col}")
print(f"Lon column: {biz_lon_col}")
print(f"\nTotal businesses before filtering: {len(business_df)}")

# Filter out rows with missing latitude or longitude
business_df[biz_lat_col] = pd.to_numeric(business_df[biz_lat_col], errors="coerce")
business_df[biz_lon_col] = pd.to_numeric(business_df[biz_lon_col], errors="coerce")
valid = business_df.dropna(subset=[biz_lat_col, biz_lon_col])

print(f"Businesses with valid coordinates: {len(valid)}")

distances_list = []
for idx, row in valid.iterrows():
    name = str(row[name_col]).strip() if pd.notna(row[name_col]) else "Unknown"
    
    # Build full address from multiple components
    address_parts = []
    for col in ["address_line1", "street_type", "city", "state", "postal_code"]:
        if col in business_df.columns and pd.notna(row[col]):
            part = str(row[col]).strip()
            if part and part != "nan":
                address_parts.append(part)
    address = " ".join(address_parts) if address_parts else "Unknown"
    
    lat = float(row[biz_lat_col])
    lon = float(row[biz_lon_col])
    miles = haversine_miles(stad_lat, stad_lon, lat, lon)
    business_data = [name, round(miles, 4), address, lat, lon]
    distances_list.append(business_data)
    print(business_data)

out_df = pd.DataFrame(distances_list, columns=["business_name", "miles_to_stadium", "address", "latitude", "longitude"])
# Remove lat/lon columns before saving (keep them temporarily for verification)
out_df.to_csv(os.path.join(BASE, "data", "business_records", "distances_to_stadium.csv"), index=False)
print("Saved distances:", out_df.shape[0])

Available columns in business_df:
['license_number', 'company_name', 'company_dba', 'license_classification', '2 or more companies', '# in each license', 'issued_date', 'naics_code', 'naics_name', 'Unnamed: 9', 'predirection', 'address_line1', 'address_line2', 'street_type', 'postdirection', 'unit_suite', 'city', 'state', 'postal_code', 'address_concat', 'address_api', 'longitude', 'latitude', 'disinvested_neighborhood', 'council_district', 'npu']


Name column: company_name
Lat column: latitude
Lon column: longitude

Total businesses before filtering: 28135
Businesses with valid coordinates: 2037
['PreView Restaurant, LLC.', 4.1524, '2221 Peachtree Rd NE Atlanta GA 30309', 33.81458899, -84.39114199]
['Botanico Hospitality Group LLC', 2.1674, '955 W Marietta ST NW Atlanta GA 30318', 33.7841318, -84.41810792]
['FACILITY CONCESSION SERVICE LLC', 3.0523, '1099 AVE ATLANTA GA 30307', 33.76360623, -84.35097038]
['FACILITY CONCESSION SERVICE LLC', 3.0523, '1099 AVE ATLANTA GA 30307', 33.7636

In [22]:

# Find max and min distances (filter out unrealistic distances > 100 miles)
realistic_df = out_df[out_df['miles_to_stadium'] <= 100]
print(f"Businesses within realistic range (â¤100 miles): {len(realistic_df)}")
print(f"Outliers removed: {len(out_df) - len(realistic_df)}")

max_idx = realistic_df['miles_to_stadium'].idxmax()
min_idx = realistic_df['miles_to_stadium'].idxmin()

max_distance = realistic_df.loc[max_idx]
min_distance = realistic_df.loc[min_idx]

print("\n" + "="*80)
print("CLOSEST BUSINESS TO STADIUM:")
print("="*80)
print(f"Name: {min_distance['business_name']}")
print(f"Distance: {min_distance['miles_to_stadium']} miles")
print(f"Address: {min_distance['address']}")

print("\n" + "="*80)
print("FARTHEST BUSINESS FROM STADIUM (within realistic range):")
print("="*80)
print(f"Name: {max_distance['business_name']}")
print(f"Distance: {max_distance['miles_to_stadium']} miles")
print(f"Address: {max_distance['address']}")


Businesses within realistic range (â¤100 miles): 2030
Outliers removed: 7

CLOSEST BUSINESS TO STADIUM:
Name: PUBLIC HOUSE 28, LLC
Distance: 0.0346 miles
Address: 10 DR ATLANTA GA 30314

FARTHEST BUSINESS FROM STADIUM (within realistic range):
Name: GIO'S CHCKN AMALFITANO, LLC
Distance: 25.5463 miles
Address: 204 Bluff Creek Dr. Woodstock GA 30188


In [30]:

# Find all businesses 25 miles or higher from stadium
far_businesses = realistic_df[realistic_df['miles_to_stadium'] >= 25].sort_values('miles_to_stadium', ascending=False)

print("\n" + "="*80)
print(f"BUSINESSES 25 MILES OR HIGHER FROM STADIUM ({len(far_businesses)} total)")
print("="*80 + "\n")

for idx, (_, row) in enumerate(far_businesses.iterrows(), 1):
    print(f"{idx}. {row['business_name']}")
    print(f"   Distance: {row['miles_to_stadium']} miles")
    print(f"   Address: {row['address']}")
    print()



BUSINESSES 25 MILES OR HIGHER FROM STADIUM (1 total)

1. GIO'S CHCKN AMALFITANO, LLC
   Distance: 25.5463 miles
   Address: 204 Bluff Creek Dr. Woodstock GA 30188

