In [1]:
from dotenv import load_dotenv
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import os

load_dotenv(r"C:\Projects\storm_risk\.env")

engine = create_engine(
    "mysql+mysqlconnector://",
    connect_args={
        "host"    : os.getenv('MYSQL_HOST'),
        "user"    : os.getenv('MYSQL_USER'),
        "password": os.getenv('MYSQL_PASSWORD'),
        "database": os.getenv('MYSQL_DATABASE')
    }
)

print("✅ Connected!")

✅ Connected!


In [2]:
# Pull FEMA disasters
fema = pd.read_sql("""
    SELECT 
        fips_code,
        declaration_date,
        incident_type,
        disaster_type
    FROM fema_disasters
    WHERE declaration_date IS NOT NULL
""", engine)

# Pull hurricane susceptibility
hurricane = pd.read_sql("""
    SELECT 
        fips_code,
        susceptibility_score,
        storm_count,
        avg_wind_speed
    FROM hurricane_susceptibility
""", engine)

print(f"FEMA records loaded:              {len(fema)}")
print(f"Hurricane counties loaded:        {len(hurricane)}")
print(f"\nFEMA date range: {fema['declaration_date'].min()} to {fema['declaration_date'].max()}")
print(f"\nFEMA incident types:\n{fema['incident_type'].value_counts().head(10)}")

FEMA records loaded:              69634
Hurricane counties loaded:        2557

FEMA date range: 1953-05-02 to 2026-02-20

FEMA incident types:
incident_type
Severe Storm        19299
Hurricane           13721
Flood               11234
Biological           7857
Fire                 3853
Snowstorm            3707
Severe Ice Storm     2956
Tornado              1623
Drought              1292
Winter Storm         1224
Name: count, dtype: int64


In [3]:
# Convert declaration_date to datetime
fema['declaration_date'] = pd.to_datetime(fema['declaration_date'])

# Count disasters per county per day
county_day = fema.groupby(
    ['fips_code', 'declaration_date']
).agg(
    disaster_count = ('incident_type', 'count')
).reset_index()

# Base risk score - log scale so large disaster counts don't dominate
county_day['risk_score'] = np.log1p(county_day['disaster_count'])

print(f"County-day records: {len(county_day)}")
print(f"\nSample:")
county_day.head(10)

County-day records: 67826

Sample:


Unnamed: 0,fips_code,declaration_date,disaster_count,risk_score
0,1000,1961-02-27,1,0.693147
1,1000,2017-09-11,1,0.693147
2,1000,2017-10-08,1,0.693147
3,1000,2018-10-12,1,0.693147
4,1000,2020-03-13,2,1.098612
5,1000,2020-03-29,2,1.098612
6,1000,2020-09-14,1,0.693147
7,1000,2020-09-20,1,0.693147
8,1000,2021-03-28,1,0.693147
9,1001,1977-07-20,1,0.693147


In [4]:
# Merge hurricane susceptibility onto every county-day record
county_day_enhanced = county_day.merge(
    hurricane,
    on='fips_code',
    how='left'
)

# Counties with no hurricane history get a score of 0
county_day_enhanced['susceptibility_score'] = \
    county_day_enhanced['susceptibility_score'].fillna(0)

# Compute enhanced risk score
county_day_enhanced['enhanced_risk'] = (
    county_day_enhanced['risk_score'] * 
    (1 + county_day_enhanced['susceptibility_score'])
)

print(f"Enhanced records: {len(county_day_enhanced)}")
print(f"\nRisk score comparison:")
print(f"Base risk mean:     {county_day_enhanced['risk_score'].mean():.4f}")
print(f"Enhanced risk mean: {county_day_enhanced['enhanced_risk'].mean():.4f}")
print(f"\nSample:")
county_day_enhanced[['fips_code','declaration_date','risk_score',
                      'susceptibility_score','enhanced_risk']].head(10)

Enhanced records: 67826

Risk score comparison:
Base risk mean:     0.6981
Enhanced risk mean: 0.8210

Sample:


Unnamed: 0,fips_code,declaration_date,risk_score,susceptibility_score,enhanced_risk
0,1000,1961-02-27,0.693147,0.0,0.693147
1,1000,2017-09-11,0.693147,0.0,0.693147
2,1000,2017-10-08,0.693147,0.0,0.693147
3,1000,2018-10-12,0.693147,0.0,0.693147
4,1000,2020-03-13,1.098612,0.0,1.098612
5,1000,2020-03-29,1.098612,0.0,1.098612
6,1000,2020-09-14,0.693147,0.0,0.693147
7,1000,2020-09-20,0.693147,0.0,0.693147
8,1000,2021-03-28,0.693147,0.0,0.693147
9,1001,1977-07-20,0.693147,0.290802,0.894716


In [5]:
# Extract month from declaration date
county_day_enhanced['month'] = \
    county_day_enhanced['declaration_date'].dt.to_period('M').astype(str)

# Aggregate to monthly level
monthly_risk = county_day_enhanced.groupby(
    ['fips_code', 'month']
).agg(
    enhanced_risk = ('enhanced_risk', 'mean')
).reset_index()

print(f"Monthly risk records: {len(monthly_risk)}")
print(f"Date range: {monthly_risk['month'].min()} to {monthly_risk['month'].max()}")
print(f"\nSample:")
monthly_risk.head(10)

Monthly risk records: 60849
Date range: 1953-05 to 2026-02

Sample:


Unnamed: 0,fips_code,month,enhanced_risk
0,1000,1961-02,0.693147
1,1000,2017-09,0.693147
2,1000,2017-10,0.693147
3,1000,2018-10,0.693147
4,1000,2020-03,1.098612
5,1000,2020-09,0.693147
6,1000,2021-03,0.693147
7,1001,1977-07,0.894716
8,1001,1979-03,0.894716
9,1001,1979-04,0.894716


In [6]:
# Prepare the county_day table for MySQL
county_day_load = county_day_enhanced[[
    'fips_code',
    'declaration_date',
    'disaster_count',
    'risk_score'
]].rename(columns={'declaration_date': 'feature_date'})

county_day_load.to_sql(
    name='county_day_features',
    con=engine,
    if_exists='append',
    index=False,
    chunksize=500
)

print(f"✅ {len(county_day_load)} county-day records loaded into MySQL!")

✅ 67826 county-day records loaded into MySQL!


In [7]:
monthly_risk.to_sql(
    name='monthly_risk',
    con=engine,
    if_exists='append',
    index=False,
    chunksize=500
)

print(f"✅ {len(monthly_risk)} monthly risk records loaded into MySQL!")

✅ 60849 monthly risk records loaded into MySQL!


In [8]:
tables = ['counties', 'fema_disasters', 'hurricane_susceptibility', 
          'county_day_features', 'monthly_risk']

print("=== MySQL Database Summary ===\n")
for table in tables:
    count = pd.read_sql(f"SELECT COUNT(*) as total FROM {table}", engine)
    print(f"{table:<30} {count['total'].values[0]:>8,} records")

=== MySQL Database Summary ===

counties                          3,324 records
fema_disasters                   69,634 records
hurricane_susceptibility          2,557 records
county_day_features              67,826 records
monthly_risk                     60,849 records
