In [1]:
from requests import Session
from requests_cache import CacheMixin
from requests_ratelimiter import LimiterMixin
from pyrate_limiter import Duration, RequestRate, Limiter

from tbapy import TBA
from tqdm.notebook import tqdm
from tqdm import tqdm_pandas
from requests_ratelimiter import LimiterSession

import geopy.distance
import json

from scipy.optimize import minimize
import numpy as np


In [2]:
tba = TBA("shLVepNTB23kq5KYRfb9OeboVb98Sd13YKd4d5WDYqVU94ZGzmNUQPk15QfAshss")
limit_rate = RequestRate(1, Duration.SECOND * 1.25)
limiter = Limiter(limit_rate)

osrm_limiter = Limiter(RequestRate(4, Duration.SECOND))

class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    """Session class with caching and rate-limiting behavior. Accepts arguments for both
    LimiterSession and CachedSession.
    """

session = CachedLimiterSession(
    cache_name="http_cache", 
    limiter=limiter,
    allowable_codes=[200],
    allowable_methods=["GET", "HEAD", "POST"]
)

In [3]:
def geocode_team(postal_code, country):
    r = session.get(f'https://geocode.maps.co/search?q={postal_code},%20{country}&api_key=658e41788b4e0762532604ews14b633')
    return r.json()[0]

geo_lookup = {}
# for team in tqdm(tba.teams(year=2024)[:2000]):
for ek in tqdm(['arc', 'cur', 'dal', 'gal', 'hop', 'new', 'joh', 'mil']):
    for team in tba.event_teams(event=f'2023{ek}'):
        if team['country'] not in ['USA', 'Canada', 'Mexico']:
            continue

        try:
            geo_lookup[team['key']] = geocode_team(team['postal_code'], team['country'])
        except:
            pass

with open('conf/teams_geocoded.json', 'w+') as f:
    json.dump(geo_lookup, f)

  0%|          | 0/8 [00:00<?, ?it/s]

In [65]:
with open('conf/venues.json', 'r') as f:
    venues = json.load(f)[:45]

def geocode(postal_code):
    r = session.get(f'https://geocode.maps.co/search?q={postal_code}&api_key=658e41788b4e0762532604ews14b633')
    return r.json()[0]


from collections import defaultdict
data = []
flying_teams = defaultdict(list)
for venue in venues:
    if len(str(venue['exhibitionSpace'])) == 0 or int(venue['exhibitionSpace']) < 500000:
        continue

    venue_geo = geocode(f"{venue['name']}, {venue['city']}")

    obj = {**venue}

    driving = 0
    flying = 0
    for tk, tgeo in geo_lookup.items():
        dist = geopy.distance.distance(
            (tgeo['lat'], tgeo['lon']),
            (venue_geo['lat'], venue_geo['lon'])
        ).miles

        if dist <= 480:
            driving += 1
        else:
            flying += 1
            flying_teams[venue['name']].append(tk)
        
    
    obj.update(**{
        'driving': driving, 'flying': flying
    })

    data.append(obj)


In [66]:
import pandas as pd
from tqdm.auto import tqdm

tqdm.pandas()

def geocode(postal_code):
    r = session.get(f'https://geocode.maps.co/search?q={postal_code}&api_key=658e41788b4e0762532604ews14b633')
    try:
        data = r.json()[0]
        return (data['lat'], data['lon'])
    except:
        return None

df = pd.read_csv('conf/flights.csv')
df['city1'] = df['city1'].str.replace(' (Metropolitan Area)', '')

df['ll'] = df['city1'].progress_apply(lambda c: geocode(c))


  0%|          | 0/110033 [00:00<?, ?it/s]

In [None]:
from collections import defaultdict

cities = df.drop_duplicates(subset="city1").replace({None: np.nan}).dropna()

tk_airport_map = {}
champs_airport_mapping = {}

team_closest_airports = defaultdict(list)
tk_price_map = {}
venue_total_dollars = {}
venue_missed_locals = defaultdict(lambda: 0)

for venue in tqdm(venues):
    for tk, tgeo in tqdm(geo_lookup.items()):
        # print(tk)
        i = 1

        tdf = cities.copy()
        tdf["distance"] = tdf["ll"].apply(
            lambda x: geopy.distance.distance(
                (tgeo["lat"], tgeo["lon"]), (x[0], x[1])
            ).miles
        )
        tdf = tdf.sort_values(by='distance')
        team_cities = tdf['city1']

        for i in range(20):
            city = team_cities.iloc[i]
            maybe_flights = df[(df['city1'] == city) & (df['city2'].str.contains(venue['city']))]

            if maybe_flights.empty:
                venue_missed_locals[venue['name']] += 1
                continue

            tk_price_map[tk] = maybe_flights.iloc[0]['fare']
            break
    
    venue_total_dollars[venue['city']] = sum(tk_price_map.values())

    # while True:
    #     closest_city = closest_cities.index[i - 1]

    #     if (
    #         len(
    #             df[
    #                 (df["city1"] == cities.loc[closest_city, "city1"])
    #                 & (df["city2"].str.contains(venue["city"]))
    #             ]
    #         )
    #         == 0
    #     ):
    #         i += 1
    #     else:
    #         break

    # tk_airport_map[tk] = cities.loc[closest_city, "city1"]

    # champs_airport_mapping[venue["city"]] = tk_airport_map

In [38]:
ddf = pd.DataFrame(list(venue_total_dollars.items()), columns=['city', 'dollars'])

In [82]:
fdf = df.copy()
venue = venues[0]

team_fares = defaultdict(list)
get_to_airport_miles = defaultdict(list)

for venue in tqdm(venues):
    if len(str(venue['exhibitionSpace'])) == 0 or int(venue['exhibitionSpace']) < 500000:
        continue
    
    print(venue)
    if venue["city"] in ["Ocala, FL", "Perry, GA"]:
        continue

    venue_city = venue["city"]
    venue_city = {
        "Clark County": "Las Vegas",
        "Anaheim, CA": "Los Angeles",
        "Rosemont, IL": "Chicago",
        "Miami Beach, FL": "Miami"
    }.get(venue_city, venue_city)

    for tk, tgeo in geo_lookup.items():
        if tk not in flying_teams[venue["name"]]:
            continue

        tdf = cities.copy()
        tdf["distance"] = tdf["ll"].apply(
            lambda x: geopy.distance.distance(
                (tgeo["lat"], tgeo["lon"]), (x[0], x[1])
            ).miles
        )
        tdf = tdf.sort_values(by="distance")
        tdf = tdf[["city1", "distance"]]
        mdf = pd.merge(fdf, tdf, on="city1", how="inner")[
            ["city1", "distance", "city2", "fare"]
        ]

        vdf = mdf[mdf["city2"].str.contains(venue_city)]
        vdf = vdf.sort_values(by="distance")

        fvdf = vdf[vdf["city1"] == vdf["city1"].iloc[0]]
        avg_fare = fvdf["fare"].mean()
        team_fares[venue["name"]].append(avg_fare)
        get_to_airport_miles[venue['name']].append(fvdf["distance"].mean())

  0%|          | 0/45 [00:00<?, ?it/s]

{'name': 'McCormick Place', 'city': 'Chicago', 'exhibitionSpace': 2670000, 'totalSpace': 9000000}
{'name': 'Orange County Convention Center', 'city': 'Orlando, FL', 'exhibitionSpace': 2100000, 'totalSpace': 7000000}
{'name': 'Las Vegas Convention Center', 'city': 'Las Vegas', 'exhibitionSpace': 2500000, 'totalSpace': 4600000}
{'name': 'Georgia World Congress Center', 'city': 'Atlanta', 'exhibitionSpace': 1500000, 'totalSpace': 3900000}
{'name': 'Jacob K. Javits Convention Center', 'city': 'New York City', 'exhibitionSpace': 850000, 'totalSpace': 3300000}
{'name': 'Ernest N. Morial Convention Center', 'city': 'New Orleans', 'exhibitionSpace': 1100000, 'totalSpace': 3100000}
{'name': "America's Center", 'city': 'St. Louis', 'exhibitionSpace': 523000, 'totalSpace': 2700000}
{'name': 'San Diego Convention Center', 'city': 'San Diego', 'exhibitionSpace': 615700, 'totalSpace': 2600000}
{'name': 'Huntington Place', 'city': 'Detroit', 'exhibitionSpace': 723500, 'totalSpace': 2400000}
{'name': 

In [89]:
from pprint import pprint

out = [
    (venue, sum(team_fares[venue]) / len(team_fares[venue]), len(team_fares[venue])) for venue in team_fares.keys()
]




pprint(sorted(out, key=lambda t: t[1]))

pprint(team_fares['Atlantic City Convention Center'])


[('Atlantic City Convention Center', 120.07100000000122, 424),
 ('Orange County Convention Center', 166.12308654583097, 525),
 ('Indiana Convention Center', 168.93782017213528, 308),
 ('Las Vegas Convention Center', 171.5358805487201, 482),
 ('Venetian', 171.5358805487201, 482),
 ('Mandalay Bay Convention Center', 171.5358805487201, 482),
 ('Huntington Place', 176.09447436995436, 356),
 ('Ernest N. Morial Convention Center', 179.80732625938307, 503),
 ('Huntington Convention Center', 182.38161839660967, 319),
 ('International Exposition Center', 182.4792126168223, 321),
 ('Miami Beach Convention Center', 182.49062190708167, 541),
 ('Colorado Convention Center', 195.3184877188526, 541),
 ('Henry B. Gonzalez Convention Center', 198.77653405582774, 512),
 ('Boston Convention and Exhibition Center', 198.84677845855967, 447),
 ('George R. Brown Convention Center', 204.77044967231882, 508),
 ('NRG Center', 204.77044967231882, 508),
 ("America's Center", 213.69042796695206, 326),
 ('Salt Pala