In [44]:
import pandas as pd
import requests
import time
from io import StringIO
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report



In [45]:
API_KEY = "e74689afabd76081043a04b9d54c1b7e6e0304d8"
BASE_URL = "https://api.census.gov/data"
YEARS = [year for year in range(2013, 2024) if year != 2020] # From 2013 to 2023
SOUTHEASTERN_STATE_CODES = {
    "AL": "01", "AR": "05", "FL": "12", "GA": "13", "KY": "21",
    "LA": "22", "MS": "28", "NC": "37", "SC": "45", "TN": "47", "VA": "51", "WV": "54"
}

S0701_VARS = [
    "S0701_C01_001E",  # Total population 1 year and over
    "S0701_C01_002E",  # Same house 1 year ago
    "S0701_C01_003E",  # Moved within same county
    "S0701_C01_004E",  # Moved from different county, same state
    "S0701_C01_005E",  # Moved from different state
    "S0701_C01_006E",  # Moved from abroad
    "S0701_C01_009E",  # Foreign-born
    "S0701_C01_012E",  # Median age of movers
    "S0701_C01_022E",  # Movers with bachelor's degree or higher
    "S0701_C01_026E"   # Movers under age 35
]

B06009_VARS = [
    "B06009_001E",  # Total population
    "B06009_005E"   # Bachelor's degree or higher
]


In [None]:
def fetch_table_by_state(year, table, variables, state_code, is_subject=True, max_retries=3):
    base_type = "subject" if is_subject else ""
    url = f"{BASE_URL}/{year}/acs/acs1/{base_type}".rstrip("/")
    
    params = {
        "get": ",".join(variables),
        "for": f"state:{state_code}",
        "key": API_KEY
    }

    for attempt in range(max_retries):
        try:
            response = requests.get(url, params=params, timeout=10)
            if response.status_code == 404:
                print(f"❌ 404 Not Found for {table} in {year}, state {state_code}")
                return pd.DataFrame()
            response.raise_for_status()
            data = response.json()
            df = pd.DataFrame(data[1:], columns=data[0])
            df["year"] = year
            df["state_code"] = state_code
            return df
        except requests.exceptions.RequestException as e:
            print(f"Retrying after error: {e}")
            time.sleep(2)
    print(f"❌ Failed to fetch {table} for {state_code} in {year}")
    return pd.DataFrame()

In [47]:
all_s0701 = []
all_b06009 = []

print("📦 Fetching ACS data for Southeastern states (2013–2023)...\n")

for year in YEARS:
    for abbrev, code in SOUTHEASTERN_STATE_CODES.items():
        print(f"🌐 S0701 for {abbrev}, {year}")
        s0701 = fetch_table_by_state(year, "S0701", S0701_VARS, code, is_subject=True)
        if not s0701.empty:
            s0701["state_abbr"] = abbrev
            all_s0701.append(s0701)

        print(f"📘 B06009 for {abbrev}, {year}")
        b06009 = fetch_table_by_state(year, "B06009", B06009_VARS, code, is_subject=False)
        if not b06009.empty:
            b06009["state_abbr"] = abbrev
            all_b06009.append(b06009)

📦 Fetching ACS data for Southeastern states (2013–2023)...

🌐 S0701 for AL, 2013
📘 B06009 for AL, 2013
🌐 S0701 for AR, 2013
📘 B06009 for AR, 2013
🌐 S0701 for FL, 2013
📘 B06009 for FL, 2013
🌐 S0701 for GA, 2013
📘 B06009 for GA, 2013
🌐 S0701 for KY, 2013
📘 B06009 for KY, 2013
🌐 S0701 for LA, 2013
📘 B06009 for LA, 2013
🌐 S0701 for MS, 2013
📘 B06009 for MS, 2013
🌐 S0701 for NC, 2013
📘 B06009 for NC, 2013
🌐 S0701 for SC, 2013
📘 B06009 for SC, 2013
🌐 S0701 for TN, 2013
📘 B06009 for TN, 2013
🌐 S0701 for VA, 2013
📘 B06009 for VA, 2013
🌐 S0701 for WV, 2013
📘 B06009 for WV, 2013
🌐 S0701 for AL, 2014
📘 B06009 for AL, 2014
🌐 S0701 for AR, 2014
📘 B06009 for AR, 2014
🌐 S0701 for FL, 2014
📘 B06009 for FL, 2014
🌐 S0701 for GA, 2014
📘 B06009 for GA, 2014
🌐 S0701 for KY, 2014
📘 B06009 for KY, 2014
🌐 S0701 for LA, 2014
📘 B06009 for LA, 2014
🌐 S0701 for MS, 2014
📘 B06009 for MS, 2014
🌐 S0701 for NC, 2014
📘 B06009 for NC, 2014
🌐 S0701 for SC, 2014
📘 B06009 for SC, 2014
🌐 S0701 for TN, 2014
📘 B06009 for TN,

In [48]:
s0701_df = pd.concat(all_s0701, ignore_index=True) if all_s0701 else pd.DataFrame()
b06009_df = pd.concat(all_b06009, ignore_index=True) if all_b06009 else pd.DataFrame()

print("\n✅ Data collection complete.")
print(f"S0701 shape: {s0701_df.shape}")
print(f"B06009 shape: {b06009_df.shape}")

# --- Optional: Save to CSV ---
s0701_df.to_csv("southeast_s0701.csv", index=False)
b06009_df.to_csv("southeast_b06009.csv", index=False)


✅ Data collection complete.
S0701 shape: (120, 14)
B06009 shape: (120, 6)


In [50]:
# merge s0701 and b06009
df = pd.merge(s0701_df, b06009_df, on=["year", "state_code", "state_abbr"], how="inner")
df.head()
df.to_csv("southeast_s0701_b06009.csv", index=False)