# Public Assistance Program
Author: Mark Bauer

In [1]:
import duckdb

# Read In Data

In [2]:
con = duckdb.connect()

con.execute("""
    CREATE TABLE public_assistance AS
    FROM read_parquet('https://www.fema.gov/api/open/v1/PublicAssistanceFundedProjectsSummaries.parquet')
"""
)

sql = """
    SELECT *
    FROM public_assistance
    LIMIT 10
"""

con.sql(sql)

┌──────────────────────┬────────────────┬─────────────────┬───┬──────────────────────┬──────────────────────┐
│          id          │ disasterNumber │ declarationDate │ … │     lastRefresh      │         hash         │
│       varchar        │     int16      │      date       │   │      timestamp       │       varchar        │
├──────────────────────┼────────────────┼─────────────────┼───┼──────────────────────┼──────────────────────┤
│ 0646e381-ea0a-46e5…  │           1239 │ 1998-08-26      │ … │ 2023-03-18 17:02:4…  │ a18872962143d707fb…  │
│ bc05fe25-0f1e-4344…  │           1239 │ 1998-08-26      │ … │ 2023-03-18 17:02:4…  │ d1c77a224203725a79…  │
│ d9c845c8-5db4-404b…  │           1239 │ 1998-08-26      │ … │ 2023-03-18 17:02:4…  │ 7fb2d6dd706512236b…  │
│ 9350e760-64da-4931…  │           1239 │ 1998-08-26      │ … │ 2023-03-18 17:02:4…  │ 571fab79609fe373f6…  │
│ a196ef51-9883-47c8…  │           1239 │ 1998-08-26      │ … │ 2023-03-18 17:02:4…  │ 39207f1c57e1f088be…  │
│ 39de6538

# Describe and Summarize Data

In [3]:
con.sql("SELECT COUNT(id) AS count_rows FROM public_assistance")

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│     185424 │
└────────────┘

In [4]:
sql = "DESCRIBE SELECT * FROM public_assistance"
describe_relation = con.sql(sql)

(con
 .sql("SELECT column_name, column_type FROM describe_relation")
 .show(max_rows=50)
)

┌────────────────────────┬─────────────┐
│      column_name       │ column_type │
│        varchar         │   varchar   │
├────────────────────────┼─────────────┤
│ id                     │ VARCHAR     │
│ disasterNumber         │ SMALLINT    │
│ declarationDate        │ DATE        │
│ incidentType           │ VARCHAR     │
│ state                  │ VARCHAR     │
│ county                 │ VARCHAR     │
│ applicantName          │ VARCHAR     │
│ educationApplicant     │ BOOLEAN     │
│ numberOfProjects       │ SMALLINT    │
│ federalObligatedAmount │ DOUBLE      │
│ lastRefresh            │ TIMESTAMP   │
│ hash                   │ VARCHAR     │
├────────────────────────┴─────────────┤
│ 12 rows                    2 columns │
└──────────────────────────────────────┘



In [5]:
sql = "SUMMARIZE SELECT * FROM public_assistance"
summarize_relation = con.sql(sql)

summarize_relation.df()

Unnamed: 0,column_name,column_type,min,max,approx_unique,avg,std,q25,q50,q75,count,null_percentage
0,id,VARCHAR,00002220-90a3-474a-a2fc-5cd033195b36,ffff6434-3cc9-4130-8363-32de43bddcb8,186976,,,,,,185424,0.0
1,disasterNumber,SMALLINT,1239,4806,1641,2907.1333915782207,1258.1182787529929,1690.0,3146.0,4240.0,185424,0.0
2,declarationDate,DATE,1998-08-26,2024-08-10,1258,,,,,,185424,0.0
3,incidentType,VARCHAR,Biological,Winter Storm,28,,,,,,185424,0.0
4,state,VARCHAR,Alabama,Wyoming,63,,,,,,185424,0.0
5,county,VARCHAR,,Ziebach,3554,,,,,,185424,0.0
6,applicantName,VARCHAR,#9 AREA FIRE DEPARTMENT,plainville,87273,,,,,,185424,0.0
7,educationApplicant,BOOLEAN,false,true,2,,,,,,185424,0.0
8,numberOfProjects,SMALLINT,0,2685,328,4.484133661230477,17.370589684393792,1.0,2.0,4.0,185424,0.0
9,federalObligatedAmount,DOUBLE,-1850352.29,12954204261.16,174011,1278095.7787142815,53157059.6320346,6655.223635316521,23338.460900643044,96417.17999434443,185424,0.0


In [6]:
sql = """
    SELECT column_name, column_type, null_percentage
    FROM summarize_relation
    ORDER BY null_percentage DESC
"""

con.sql(sql).show(max_rows=50)

┌────────────────────────┬─────────────┬─────────────────┐
│      column_name       │ column_type │ null_percentage │
│        varchar         │   varchar   │  decimal(9,2)   │
├────────────────────────┼─────────────┼─────────────────┤
│ id                     │ VARCHAR     │            0.00 │
│ disasterNumber         │ SMALLINT    │            0.00 │
│ declarationDate        │ DATE        │            0.00 │
│ incidentType           │ VARCHAR     │            0.00 │
│ state                  │ VARCHAR     │            0.00 │
│ county                 │ VARCHAR     │            0.00 │
│ applicantName          │ VARCHAR     │            0.00 │
│ educationApplicant     │ BOOLEAN     │            0.00 │
│ numberOfProjects       │ SMALLINT    │            0.00 │
│ federalObligatedAmount │ DOUBLE      │            0.00 │
│ lastRefresh            │ TIMESTAMP   │            0.00 │
│ hash                   │ VARCHAR     │            0.00 │
├────────────────────────┴─────────────┴────────────────

# Preview Values

In [7]:
sql = """
    SELECT
        id,                  
        disasterNumber,         
        declarationDate,        
        incidentType,           
        state,                
        county,            
        applicantName,     
        educationApplicant,   
        numberOfProjects,  
        federalObligatedAmount,
        lastRefresh,       
        hash
    FROM
        public_assistance
    LIMIT 5   
"""

con.sql(sql).df()

Unnamed: 0,id,disasterNumber,declarationDate,incidentType,state,county,applicantName,educationApplicant,numberOfProjects,federalObligatedAmount,lastRefresh,hash
0,0646e381-ea0a-46e5-993d-1bdb1cfe2a27,1239,1998-08-26,Severe Storm,Texas,Edwards,EDWARDS (COUNTY),False,20,352427.09,2023-03-18 17:02:48.443,a18872962143d707fb83e3f0b054abda40413d04
1,bc05fe25-0f1e-4344-a120-55f23f45cd4e,1239,1998-08-26,Severe Storm,Texas,Kimble,"JUNCTION, CITY OF",False,1,6234.9,2023-03-18 17:02:48.443,d1c77a224203725a79d26a1d84f8ad2b06b36c7f
2,d9c845c8-5db4-404b-866d-e06b3c73397a,1239,1998-08-26,Severe Storm,Texas,Kimble,KIMBLE (COUNTY),False,5,69933.35,2023-03-18 17:02:48.443,7fb2d6dd706512236b3ed6c0e6ddd6a633b50d38
3,9350e760-64da-4931-ace0-0e37d7e82eb3,1239,1998-08-26,Severe Storm,Texas,Kimble,TEXAS TECH UNIVERSITY CTR,True,1,2850.0,2023-03-18 17:02:48.443,571fab79609fe373f64529b540289ee5dd400ed5
4,a196ef51-9883-47c8-809d-1d797e8e1f48,1239,1998-08-26,Severe Storm,Texas,Kinney,FORT CLARK MUNICIPAL UTILITY,False,1,5016.0,2023-03-18 17:02:48.443,39207f1c57e1f088bef0ce07238c1aa9a18022d8


# Analysis

In [8]:
sql = """
    SELECT
        ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS total_federal_obligated_amount
    FROM
        public_assistance  
"""

con.sql(sql)

┌────────────────────────────────┐
│ total_federal_obligated_amount │
│             int64              │
├────────────────────────────────┤
│                   236989631672 │
└────────────────────────────────┘

In [9]:
federalObligatedAmount = con.sql(sql).df().values[0][0]

print(f"Total federal obligated amount: ${federalObligatedAmount:,}")

Total federal obligated amount: $236,989,631,672


In [10]:
sql = """
    SELECT
        COUNT(DISTINCT disasterNumber)
    FROM
        public_assistance  
"""

disasterNumber = con.sql(sql).df().values[0][0]

print(f"Total disaster declarations: {disasterNumber:,}")

Total disaster declarations: 1,646


In [11]:
sql = """
    SELECT
        COUNT(DISTINCT applicantName)
    FROM
        public_assistance  
    WHERE 
        numberOfProjects > 0
        
"""

applicantName = con.sql(sql).df().values[0][0]

print(f"Total applicants with funded projects: {applicantName:,}")

Total applicants with funded projects: 86,100


In [12]:
sql = """
    SELECT
        SUM(numberOfProjects)::BIGINT
    FROM
        public_assistance  
        
"""

numberOfProjects = con.sql(sql).df().values[0][0]

print(f"Total funded Project Worksheets: {numberOfProjects:,}")

Total funded Project Worksheets: 831,466


In [13]:
sql = """
    SELECT
       disasterNumber,
       state,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        disasterNumber, state
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬────────────────┬────────────────────────┐
│ disasterNumber │     state      │ federalObligatedAmount │
│     int16      │    varchar     │         int64          │
├────────────────┼────────────────┼────────────────────────┤
│           4339 │ Puerto Rico    │            33974936224 │
│           4480 │ New York       │            15118137550 │
│           4485 │ Texas          │            14905058960 │
│           4085 │ New York       │            14735635709 │
│           1603 │ Louisiana      │            13524620378 │
│           4340 │ Virgin Islands │            12239518967 │
│           4482 │ California     │            10609269973 │
│           1391 │ New York       │             4708614487 │
│           1604 │ Mississippi    │             3184451567 │
│           4486 │ Florida        │             3051885056 │
│           4496 │ Massachusetts  │             2966517981 │
│           4488 │ New Jersey     │             2960234354 │
│           4332 │ Texas

In [14]:
sql = """
    SELECT
       disasterNumber,
       applicantName,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        disasterNumber, applicantName
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬─────────────────────────────────────────────────────────────┬────────────────────────┐
│ disasterNumber │                        applicantName                        │ federalObligatedAmount │
│     int16      │                           varchar                           │         int64          │
├────────────────┼─────────────────────────────────────────────────────────────┼────────────────────────┤
│           4339 │ PR Electric Power Authority                                 │            12954204261 │
│           4485 │ Texas Department of State Health Services                   │             9697013440 │
│           4480 │ City of New York - Management and Budget                    │             8613685564 │
│           4085 │ NEW YORK                                                    │             8513455550 │
│           4339 │ PR Aqueduct and Sewer Authority                             │             4627455402 │
│           4485 │ Texas Division of Emergency

In [15]:
sql = """
    SELECT
       substring(declarationDate::VARCHAR, 1, 4) as year,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        year
    ORDER BY
        federalObligatedAmount DESC   
"""

state_dollars = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬─────────────────────────────────────────────────────────────┬────────────────────────┐
│ disasterNumber │                        applicantName                        │ federalObligatedAmount │
│     int16      │                           varchar                           │         int64          │
├────────────────┼─────────────────────────────────────────────────────────────┼────────────────────────┤
│           4339 │ PR Electric Power Authority                                 │            12954204261 │
│           4485 │ Texas Department of State Health Services                   │             9697013440 │
│           4480 │ City of New York - Management and Budget                    │             8613685564 │
│           4085 │ NEW YORK                                                    │             8513455550 │
│           4339 │ PR Aqueduct and Sewer Authority                             │             4627455402 │
│           4485 │ Texas Division of Emergency

In [16]:
sql = """
    SELECT
       incidentType,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        incidentType
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌──────────────────┬────────────────────────┐
│   incidentType   │ federalObligatedAmount │
│     varchar      │         int64          │
├──────────────────┼────────────────────────┤
│ Hurricane        │           113361816918 │
│ Biological       │            81838612896 │
│ Severe Storm     │            13092270695 │
│ Fire             │             6398730254 │
│ Flood            │             5358688616 │
│ Wildfire         │             3889655190 │
│ Severe Storm(s)  │             3618422684 │
│ Severe Ice Storm │             1906764889 │
│ Earthquake       │             1502583773 │
│ Tropical Storm   │             1463069897 │
│ Snowstorm        │             1190088584 │
│ Winter Storm     │              822070100 │
│ Tornado          │              632699623 │
│ Other            │              613914868 │
│ Coastal Storm    │              492302441 │
│ Typhoon          │              475359850 │
│ Volcano          │              124893424 │
│ Mud/Landslide    │              

In [17]:
sql = """
    SELECT
       substring(declarationDate::VARCHAR, 1, 4) as year,
       incidentType,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        year, incidentType
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌─────────┬─────────────────┬────────────────────────┐
│  year   │  incidentType   │ federalObligatedAmount │
│ varchar │     varchar     │         int64          │
├─────────┼─────────────────┼────────────────────────┤
│ 2020    │ Biological      │            81835719455 │
│ 2017    │ Hurricane       │            51714839162 │
│ 2005    │ Hurricane       │            20437583611 │
│ 2012    │ Hurricane       │            17384849682 │
│ 2001    │ Fire            │             4795549820 │
│ 2022    │ Hurricane       │             3503900486 │
│ 2020    │ Hurricane       │             3428794938 │
│ 2008    │ Hurricane       │             3293955267 │
│ 2018    │ Hurricane       │             3276880982 │
│ 2004    │ Hurricane       │             2883976508 │
│ 2021    │ Hurricane       │             2840452438 │
│ 2008    │ Severe Storm    │             2248703906 │
│ 2018    │ Wildfire        │             1949261263 │
│ 2019    │ Severe Storm(s) │             1415138914 │
│ 2011    

In [18]:
sql = """
    SELECT
       state,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        state
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬────────────────────────┐
│     state      │ federalObligatedAmount │
│    varchar     │         int64          │
├────────────────┼────────────────────────┤
│ New York       │            37532403401 │
│ Puerto Rico    │            36816439199 │
│ Louisiana      │            24702773099 │
│ Texas          │            22663400436 │
│ California     │            17002685086 │
│ Florida        │            16501684217 │
│ Virgin Islands │            12465714202 │
│ New Jersey     │             6064328491 │
│ Mississippi    │             4233118470 │
│ North Carolina │             4013296086 │
│ Massachusetts  │             3692716154 │
│ Washington     │             3215551006 │
│ Colorado       │             2757000734 │
│ Maryland       │             2502085282 │
│ Illinois       │             2440722364 │
│ Iowa           │             2380877862 │
│ Pennsylvania   │             2238697449 │
│ Virginia       │             1829513732 │
│ Oregon         │             1

In [19]:
sql = """
    SELECT
       state,
       county,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        state, county
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬────────────────────┬────────────────────────┐
│     state      │       county       │ federalObligatedAmount │
│    varchar     │      varchar       │         int64          │
├────────────────┼────────────────────┼────────────────────────┤
│ Puerto Rico    │ Statewide          │            29124591246 │
│ Texas          │ Statewide          │            16410531561 │
│ New York       │ New York           │            12531417444 │
│ Virgin Islands │ Statewide          │            12430079292 │
│ California     │ Statewide          │            10171603998 │
│ New York       │ New York County    │             9735811199 │
│ Louisiana      │ Statewide          │             8730357357 │
│ New York       │ Statewide          │             7412039715 │
│ Florida        │ Statewide          │             6286967812 │
│ Louisiana      │ Orleans            │             5064821871 │
│ New Jersey     │ Statewide          │             2822501914 │
│ Colorado       │ Statew

In [20]:
sql = """
    SELECT
       state,
       incidentType,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        state, incidentType
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬──────────────┬────────────────────────┐
│     state      │ incidentType │ federalObligatedAmount │
│    varchar     │   varchar    │         int64          │
├────────────────┼──────────────┼────────────────────────┤
│ Puerto Rico    │ Hurricane    │            35560477291 │
│ Louisiana      │ Hurricane    │            21112502682 │
│ New York       │ Hurricane    │            15342097227 │
│ New York       │ Biological   │            15121699737 │
│ Texas          │ Biological   │            14905058960 │
│ Florida        │ Hurricane    │            12630883911 │
│ Virgin Islands │ Hurricane    │            12306903721 │
│ California     │ Biological   │            10618638278 │
│ Texas          │ Hurricane    │             6175865104 │
│ New York       │ Fire         │             4708614487 │
│ Mississippi    │ Hurricane    │             3297426051 │
│ California     │ Wildfire     │             3057389795 │
│ Florida        │ Biological   │             3053694701

In [21]:
sql = """
    SELECT
       disasterNumber,
       state,
       substring(declarationDate::VARCHAR, 1, 4) as year,
       incidentType,
       ROUND(SUM(federalObligatedAmount), 0)::BIGINT AS federalObligatedAmount
    FROM
        public_assistance
    GROUP BY
        disasterNumber, state, year, incidentType
    ORDER BY
        federalObligatedAmount DESC   
"""

relation = con.sql(sql)

con.sql("""
    SELECT *
    FROM relation
    ORDER BY federalObligatedAmount DESC
    LIMIT 20
""")

┌────────────────┬────────────────┬─────────┬──────────────┬────────────────────────┐
│ disasterNumber │     state      │  year   │ incidentType │ federalObligatedAmount │
│     int16      │    varchar     │ varchar │   varchar    │         int64          │
├────────────────┼────────────────┼─────────┼──────────────┼────────────────────────┤
│           4339 │ Puerto Rico    │ 2017    │ Hurricane    │            33974936224 │
│           4480 │ New York       │ 2020    │ Biological   │            15118137550 │
│           4485 │ Texas          │ 2020    │ Biological   │            14905058960 │
│           4085 │ New York       │ 2012    │ Hurricane    │            14735635709 │
│           1603 │ Louisiana      │ 2005    │ Hurricane    │            13524620378 │
│           4340 │ Virgin Islands │ 2017    │ Hurricane    │            12239518967 │
│           4482 │ California     │ 2020    │ Biological   │            10609269973 │
│           1391 │ New York       │ 2001    │ Fire    