In [289]:
import pandas as pd

In [290]:
import requests
import pandas as pd

CENSUS_API_KEY = "762ebaf73f9eaa5e8d3c5c8ffc7024f25f60d3c8"
BASE_URL = "https://api.census.gov/data/2023/acs/acs5"
STATE_FIPS = "19"

def fetch_census_data_safe(variables, description=""):
    """Fetch data from Census API, safely handle errors, and skip missing variables."""
    params = {
        "get": ",".join(variables),
        "for": "county:*",
        "in": f"state:{STATE_FIPS}",
        "key": CENSUS_API_KEY
    }
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code != 200:
        print(f"[{description}] Request failed with status {response.status_code}")
        print(response.text[:500])
        return None
    
    try:
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        return df
    except Exception as e:
        print(f"[{description}] JSON decode error: {e}")
        print(response.text[:500])
        return None

# ——————————————————————————————



In [291]:
# 1️⃣ Race variables (incrementally safe)
race_vars = [
    "NAME","B02001_001E","B02001_002E","B02001_003E","B02001_004E",
    "B02001_005E"
]  # Start with safe ones
race_df = fetch_census_data_safe(race_vars, "Race")
if race_df is not None:
    race_df.rename(columns={
        "NAME":"County",
        "B02001_001E":"Total_Population",
        "B02001_002E":"White_Alone",
        "B02001_003E":"Black_Alone",
        "B02001_004E":"AIAN_Alone",
        "B02001_005E":"Asian_Alone",
        "state":"State_FIPS",
        "county":"County_FIPS"
    }, inplace=True)

In [292]:
edu_vars = ["B15003_001E","B15003_017E","B15003_022E","B15003_023E"]
edu_df = fetch_census_data_safe(edu_vars, "Education")
if edu_df is not None:
    edu_df.rename(columns={
        "B15003_001E":"Population_25_and_Over",
        "B15003_017E":"High_School_Graduate",
        "B15003_022E":"Bachelors_Degree",
        "B15003_023E":"Masters_Degree",
        "state":"State_FIPS",
        "county":"County_FIPS"
    }, inplace=True)


In [293]:
# 3️⃣ Income & Age safe
income_age_vars = ["B19013_001E","B01002_001E"]
income_age_df = fetch_census_data_safe(income_age_vars, "Income/Age")
if income_age_df is not None:
    income_age_df.rename(columns={
        "B19013_001E":"Median_Household_Income",
        "B01002_001E":"Median_Age",
        "state":"State_FIPS",
        "county":"County_FIPS"
    }, inplace=True)

In [294]:
import requests
import pandas as pd

CENSUS_API_KEY = "762ebaf73f9eaa5e8d3c5c8ffc7024f25f60d3c8"
STATE_FIPS = "19"
BASE_URL_2010 = "https://api.census.gov/data/2010/dec/aian"

# Fetch HCT1 group
params = {
    "get": "HCT001001,HCT001004,HCT001006,NAME",
    "for": "county:*",
    "in": f"state:{STATE_FIPS}",
    "key": CENSUS_API_KEY
}

response = requests.get(BASE_URL_2010, params=params)
if response.status_code != 200:
    print(f"Request failed: {response.status_code}")
    print(response.text[:500])
else:
    data = response.json()
    df = pd.DataFrame(data[1:], columns=data[0])

    # Rename columns
    df.rename(columns={
        "HCT001001": "Total_Occupied_Pop_2010",
        "HCT001004": "Urban_Pop_2010",
        "HCT001006": "Rural_Pop_2010",
        "NAME": "County",
        "state": "State_FIPS",
        "county": "County_FIPS"
    }, inplace=True)

    # Convert to numeric
    for col in ["Total_Occupied_Pop_2010","Urban_Pop_2010","Rural_Pop_2010"]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df['Rural_Pop_2010'] = df['Total_Occupied_Pop_2010'] - df['Urban_Pop_2010']
    # Percentages
    df["Pct_Urban_2010"] = df["Urban_Pop_2010"] / df["Total_Occupied_Pop_2010"] * 100
    df["Pct_Rural_2010"] = df["Rural_Pop_2010"] / df["Total_Occupied_Pop_2010"] * 100
    rural_df = df
    display(rural_df)


Unnamed: 0,Total_Occupied_Pop_2010,Urban_Pop_2010,Rural_Pop_2010,County,State_FIPS,County_FIPS,Pct_Urban_2010,Pct_Rural_2010
0,3292,0,3292,"Adair County, Iowa",19,001,0.000000,100.000000
1,1715,0,1715,"Adams County, Iowa",19,003,0.000000,100.000000
2,5845,1702,4143,"Allamakee County, Iowa",19,005,29.118905,70.881095
3,5627,2453,3174,"Appanoose County, Iowa",19,007,43.593389,56.406611
4,2617,0,2617,"Audubon County, Iowa",19,009,0.000000,100.000000
...,...,...,...,...,...,...,...,...
94,4597,1473,3124,"Winnebago County, Iowa",19,189,32.042637,67.957363
95,7997,3043,4954,"Winneshiek County, Iowa",19,191,38.051769,61.948231
96,39052,0,39052,"Woodbury County, Iowa",19,193,0.000000,100.000000
97,3172,0,3172,"Worth County, Iowa",19,195,0.000000,100.000000


In [295]:
# ——————————————————————————————
# Merge all available DataFrames including 2010 Urban/Rural
dfs = [d for d in [race_df, edu_df, income_age_df, urban_rural_df] if d is not None]
if not dfs:
    raise ValueError("No data fetched. Check your API key.")

df = dfs[0]
for d in dfs[1:]:
    df = df.merge(d, on=["State_FIPS", "County_FIPS"], how="left")

# ——————————————————————————————
# Convert numeric columns safely
numeric_cols = [
    "Total_Population","White_Alone","Black_Alone","AIAN_Alone","Asian_Alone",
    "Population_25_and_Over","High_School_Graduate","Bachelors_Degree","Masters_Degree",
    "Median_Household_Income","Median_Age",
    "Total_Occupied_Pop_2010","Urban_Pop_2010","Rural_Pop_2010",
    "Pct_Urban_2010","Pct_Rural_2010"
]
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# ——————————————————————————————
# Calculate race percentages
if "Total_Population" in df.columns:
    for col, pct_col in [("White_Alone","Pct_White"), ("Black_Alone","Pct_Black")]:
        if col in df.columns:
            df[pct_col] = df[col].div(df["Total_Population"]) * 100

# ——————————————————————————————
# Calculate education percentages
if "Population_25_and_Over" in df.columns:
    degree_cols = [c for c in ["Bachelors_Degree","Masters_Degree"] if c in df.columns]
    if degree_cols:
        df["Pct_Bachelors_or_Higher"] = df[degree_cols].sum(axis=1).div(df["Population_25_and_Over"]) * 100
    if "High_School_Graduate" in df.columns:
        df["Pct_High_School"] = df["High_School_Graduate"].div(df["Population_25_and_Over"]) * 100

# ——————————————————————————————
# Use 2010 Urban/Rural percentages directly
if "Pct_Urban_2010" in df.columns and "Pct_Rural_2010" in df.columns:
    df["Pct_Urban"] = df["Pct_Urban_2010"]
    df["Pct_Rural"] = df["Pct_Rural_2010"]

# ——————————————————————————————
# Reorder columns for clarity
cols_order = [
    "County","State_FIPS","County_FIPS",
    "Total_Population","White_Alone","Black_Alone","AIAN_Alone","Asian_Alone",
    "Population_25_and_Over","High_School_Graduate","Bachelors_Degree","Masters_Degree",
    "Median_Household_Income","Median_Age",
    "Total_Occupied_Pop_2010","Urban_Pop_2010","Rural_Pop_2010",
    "Pct_White","Pct_Black","Pct_Bachelors_or_Higher","Pct_High_School",
    "Pct_Urban","Pct_Rural"
]
df = df[[c for c in cols_order if c in df.columns]]
df['Pct_Rural'] = rural_df['Pct_Rural_2010']
# ——————————————————————————————
display(df.head())


Unnamed: 0,State_FIPS,County_FIPS,Total_Population,White_Alone,Black_Alone,AIAN_Alone,Asian_Alone,Population_25_and_Over,High_School_Graduate,Bachelors_Degree,Masters_Degree,Median_Household_Income,Median_Age,Pct_White,Pct_Black,Pct_Bachelors_or_Higher,Pct_High_School,Pct_Rural
0,19,1,7471,7091,101,27,27,5257,1948,784,154,66176,42.6,94.913666,1.351894,17.842876,37.055355,100.0
1,19,3,3641,3475,13,20,0,2653,829,524,74,68828,46.6,95.440813,0.357045,22.54052,31.247644,100.0
2,19,5,14038,12690,246,115,76,9670,3545,1345,433,66000,43.1,90.397493,1.752386,18.386763,36.659772,70.881095
3,19,7,12242,11641,121,7,53,8795,2701,1338,283,51146,45.5,95.090671,0.988401,18.430927,30.710631,56.406611
4,19,9,5622,5351,15,12,2,4039,1324,653,143,54152,47.4,95.179651,0.266809,19.707848,32.780391,100.0


In [296]:
TI = pd.read_csv('Wind Data Combo.csv')
TI = TI['TI']
display(TI.head())

0    3.0
1    4.0
2    1.0
3    4.0
4    3.0
Name: TI, dtype: float64

In [297]:
df['TI'] = TI

 
display(df)

Unnamed: 0,State_FIPS,County_FIPS,Total_Population,White_Alone,Black_Alone,AIAN_Alone,Asian_Alone,Population_25_and_Over,High_School_Graduate,Bachelors_Degree,Masters_Degree,Median_Household_Income,Median_Age,Pct_White,Pct_Black,Pct_Bachelors_or_Higher,Pct_High_School,Pct_Rural,TI
0,19,001,7471,7091,101,27,27,5257,1948,784,154,66176,42.6,94.913666,1.351894,17.842876,37.055355,100.000000,3.0
1,19,003,3641,3475,13,20,0,2653,829,524,74,68828,46.6,95.440813,0.357045,22.540520,31.247644,100.000000,4.0
2,19,005,14038,12690,246,115,76,9670,3545,1345,433,66000,43.1,90.397493,1.752386,18.386763,36.659772,70.881095,1.0
3,19,007,12242,11641,121,7,53,8795,2701,1338,283,51146,45.5,95.090671,0.988401,18.430927,30.710631,56.406611,4.0
4,19,009,5622,5351,15,12,2,4039,1324,653,143,54152,47.4,95.179651,0.266809,19.707848,32.780391,100.000000,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,19,189,10648,9667,98,13,340,7223,1940,1361,337,63719,41.7,90.787002,0.920361,23.508238,26.858646,67.957363,1.0
95,19,191,19972,18844,136,50,116,13409,3915,2852,837,75652,42.2,94.352093,0.680953,27.511373,29.196808,61.948231,4.0
96,19,193,105760,78375,4813,1802,2654,67514,18774,10508,4001,70147,36.2,74.106467,4.550870,21.490358,27.807566,100.000000,3.0
97,19,195,7380,6986,64,7,43,5362,1675,791,206,76875,44.3,94.661247,0.867209,18.593808,31.238344,100.000000,4.0


In [298]:
# Run Regression with statsmodels
import statsmodels.api as sm
X = df[["Pct_White", "Pct_Bachelors_or_Higher", "Median_Household_Income", "Median_Age", "Pct_Rural" ]]
y = df["TI"]
X = sm.add_constant(X)  # Adds a constant term to the predictors
model = sm.OLS(y, X, missing='drop')  # Handle missing data
results = model.fit()
display(results.summary())


0,1,2,3
Dep. Variable:,TI,R-squared:,0.052
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.029
Date:,"Sat, 03 Jan 2026",Prob (F-statistic):,0.405
Time:,11:39:05,Log-Likelihood:,-124.84
No. Observations:,99,AIC:,261.7
Df Residuals:,93,BIC:,277.3
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.9792,1.806,2.203,0.030,0.393,7.566
Pct_White,-0.0338,0.019,-1.818,0.072,-0.071,0.003
Pct_Bachelors_or_Higher,-0.0203,0.018,-1.106,0.272,-0.057,0.016
Median_Household_Income,1.001e-05,1.21e-05,0.829,0.409,-1.4e-05,3.4e-05
Median_Age,0.0417,0.037,1.116,0.267,-0.033,0.116
Pct_Rural,0.0027,0.004,0.700,0.486,-0.005,0.010

0,1,2,3
Omnibus:,13.781,Durbin-Watson:,2.489
Prob(Omnibus):,0.001,Jarque-Bera (JB):,15.001
Skew:,-0.919,Prob(JB):,0.000553
Kurtosis:,3.508,Cond. No.,1440000.0


In [301]:
save_df = X 
save_df['TI'] = y
save_df['County'] = rural_df['County']
save_df = save_df.drop(columns=['const'])
save_df.to_csv('WindDataWithTI.csv', index=False)
display(save_df)

Unnamed: 0,Pct_White,Pct_Bachelors_or_Higher,Median_Household_Income,Median_Age,Pct_Rural,TI,County
0,94.913666,17.842876,66176,42.6,100.000000,3.0,"Adair County, Iowa"
1,95.440813,22.540520,68828,46.6,100.000000,4.0,"Adams County, Iowa"
2,90.397493,18.386763,66000,43.1,70.881095,1.0,"Allamakee County, Iowa"
3,95.090671,18.430927,51146,45.5,56.406611,4.0,"Appanoose County, Iowa"
4,95.179651,19.707848,54152,47.4,100.000000,3.0,"Audubon County, Iowa"
...,...,...,...,...,...,...,...
94,90.787002,23.508238,63719,41.7,67.957363,1.0,"Winnebago County, Iowa"
95,94.352093,27.511373,75652,42.2,61.948231,4.0,"Winneshiek County, Iowa"
96,74.106467,21.490358,70147,36.2,100.000000,3.0,"Woodbury County, Iowa"
97,94.661247,18.593808,76875,44.3,100.000000,4.0,"Worth County, Iowa"
