In [25]:
# Cell 1: Import libraries
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Use a higher timeout and longer delay to avoid being throttled
geolocator = Nominatim(user_agent="wta-env-geocoder", timeout=10)
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2, max_retries=3, error_wait_seconds=10.0)

# Enable tqdm for pandas apply
tqdm.pandas()

In [19]:
# Cell 2: Load the dataset (if not already loaded)
merged_env_data = pd.read_csv("Merged_WTA_Env.csv")

In [20]:
# Cell 3: Initialize geolocator
geolocator = Nominatim(user_agent="wta-env-mapper")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [21]:
# Cell 4: Extract unique cities
unique_cities = merged_env_data["Tournament City"].dropna().unique()

In [26]:
from tqdm import tqdm
import time

# Track results
city_coords = {}

# Loop over unique cities with retry handling
for city in tqdm(unique_cities, desc="Geocoding cities"):
    try:
        location = geocode(city)
        if location:
            city_coords[city] = {"Latitude": location.latitude, "Longitude": location.longitude}
        else:
            city_coords[city] = {"Latitude": None, "Longitude": None}
    except Exception as e:
        print(f"Geocoding failed for {city}: {e}")
        city_coords[city] = {"Latitude": None, "Longitude": None}
        time.sleep(5)

Geocoding cities:   0%|          | 0/106 [00:00<?, ?it/s]

Geocoding cities: 100%|██████████| 106/106 [03:30<00:00,  1.99s/it]


In [23]:
# Cell 6: Convert coordinates to DataFrame
coords_df = pd.DataFrame.from_dict(city_coords, orient="index").reset_index()
coords_df.columns = ["Tournament City", "Latitude", "Longitude"]

In [24]:
# Cell 7: Merge coordinates into your dataset
merged_env_data = merged_env_data.merge(coords_df, on="Tournament City", how="left")

In [27]:
merged_env_data.to_csv("merged_env_data_with_coords.csv")

In [1]:
import pandas as pd
import altair as alt

# Enable interactive rendering in Jupyter (for notebook, not JupyterLab)
alt.renderers.enable('default')

RendererRegistry.enable('default')

In [2]:
df = pd.read_csv("merged_env_data_with_coords.csv")

# Drop rows missing key values
df = df.dropna(subset=["AQI Reading", "Latitude", "Longitude", "Year"])

# Ensure correct types
df["AQI Reading"] = pd.to_numeric(df["AQI Reading"], errors="coerce")
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")

# Average AQI per city/year
aqi_summary = (
    df.groupby(["Tournament City", "Year", "Latitude", "Longitude"])
      .agg(avg_aqi=("AQI Reading", "mean"))
      .reset_index()
)

In [3]:
# Define color scale for AQI categories
aqi_color_scale = alt.Scale(
    domain=[0, 51, 101, 151, 201, 301],
    range=["green", "yellow", "orange", "red", "purple", "maroon"]
)

# Create slider parameter (Altair v5)
year_slider = alt.param(
    name="year",
    bind=alt.binding_range(min=int(aqi_summary["Year"].min()),
                           max=int(aqi_summary["Year"].max()),
                           step=1,
                           name="Select Year:"),
    value=int(aqi_summary["Year"].min())
)

In [4]:
choropleth = alt.Chart(aqi_summary).mark_circle(size=100).encode(
    longitude="Longitude:Q",
    latitude="Latitude:Q",
    color=alt.Color("avg_aqi:Q", scale=aqi_color_scale, title="Average AQI"),
    tooltip=["Tournament City:N", "Year:O", "avg_aqi:Q"]
).transform_filter(
    alt.datum.Year == year_slider
).add_params(
    year_slider
).project(
    type="equalEarth"
).properties(
    width=800,
    height=400,
    title="Average AQI by Tournament City (Use Slider to Change Year)"
)

In [5]:
# Load the world map outline from Vega's built-in dataset
countries = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")

# Create the base map layer
base_map = alt.Chart(countries).mark_geoshape(
    fill="lightgray",
    stroke="white"
).project(
    type="equalEarth"
).properties(
    width=800,
    height=400
)

In [6]:
full_map = base_map + choropleth
full_map

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [7]:
full_map.save('index.html')

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
