# Data download

https://www.neighborhoodatlas.medicine.wisc.edu/

In [1]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Load polars, which enables loading of parquet files
import polars as pl

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

## Minimal duckdb query to standardize the ADI file 

Prompt for Claude:

```
%%sql
SELECT *
FROM read_csv('https://data.cityofnewyork.us/api/views/erm2-nwe9/rows.csv?accessType=DOWNLOAD',
    header=True,
    delim=',',
    quote='"',
    columns={'Unique Key': 'BIGINT',
    'Created Date': 'VARCHAR',
    'Closed Date': 'VARCHAR',
    'Agency': 'VARCHAR',
    'Agency Name': 'VARCHAR',
    'Complaint Type': 'VARCHAR',
    'Descriptor': 'VARCHAR',
    'Location Type': 'VARCHAR',
    'Incident Zip': 'VARCHAR',
    'Incident Address': 'VARCHAR',
    'Street Name': 'VARCHAR',
    'Cross Street 1': 'VARCHAR',
    'Cross Street 2': 'VARCHAR',
    'Intersection Street 1': 'VARCHAR',
    'Intersection Street 2': 'VARCHAR',
    'Address Type': 'VARCHAR',
    'City': 'VARCHAR',
    'Landmark': 'VARCHAR',
    'Facility Type': 'VARCHAR',
    'Status': 'VARCHAR',
    'Due Date': 'VARCHAR',
    'Resolution Description': 'VARCHAR',
    'Resolution Action Updated Date': 'VARCHAR',
    'Community Board': 'VARCHAR',
    'BBL': 'VARCHAR',
    'Borough': 'VARCHAR',
    'X Coordinate (State Plane)': 'VARCHAR',
    'Y Coordinate (State Plane)': 'VARCHAR',
    'Open Data Channel Type': 'VARCHAR',
    'Park Facility Name': 'VARCHAR',
    'Park Borough': 'VARCHAR',
    'Vehicle Type': 'VARCHAR',
    'Taxi Company Borough': 'VARCHAR',
    'Taxi Pick Up Location': 'VARCHAR',
    'Bridge Highway Name': 'VARCHAR',
    'Bridge Highway Direction': 'VARCHAR',
    'Road Ramp': 'VARCHAR',
    'Bridge Highway Segment': 'VARCHAR',
    'Latitude': 'DOUBLE',
    'Longitude': 'DOUBLE',
    'Location': 'VARCHAR'}) 
LIMIT 10;

Please use the above query example and rewrite it for the file at `/Users/me/Downloads/adi-download/US_2021_ADI_Census\ Block\ Group_v4.csv` that has the following header:

```"","GISJOIN","ADI_NATRANK","ADI_STATERNK","FIPS"
"1","G01000100201001","74","5","010010201001"
"2","G01000100201002","74","5","010010201002"
"3","G01000100202001","84","7","010010202001"
"4","G01000100202002","84","7","010010202002"
"5","G01000100203001","79","6","010010203001"
"6","G01000100203002","79","6","010010203002"
"7","G01000100204001","64","3","010010204001"
"8","G01000100204002","64","3","010010204002"
"9","G01000100204003","64","3","010010204003"
```
```

In [3]:
!head /Users/me/Downloads/adi-download/2021\ ADI_Census\ Block\ Group_v4.0_ReadMe.txt

2021 Block Group ADI Files v4.0

These files contain a linkage between the Census block group and the ADI score.
The file contains four relevant fields:
- GISJOIN: Key linkage field to the block group shapefile served by NHGIS
- FIPS: The block group Census ID
- ADI_NATRANK: National percentile of block group ADI score
- ADI_STATERNK: State-specific decile of block group ADI score




In [4]:
!head /Users/me/Downloads/adi-download/US_2021_ADI_Census\ Block\ Group_v4.csv

"","GISJOIN","ADI_NATRANK","ADI_STATERNK","FIPS"
"1","G01000100201001","74","5","010010201001"
"2","G01000100201002","74","5","010010201002"
"3","G01000100202001","84","7","010010202001"
"4","G01000100202002","84","7","010010202002"
"5","G01000100203001","79","6","010010203001"
"6","G01000100203002","79","6","010010203002"
"7","G01000100204001","64","3","010010204001"
"8","G01000100204002","64","3","010010204002"
"9","G01000100204003","64","3","010010204003"


In [14]:
%%sql
SELECT *
FROM read_csv('/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv', 
  header=True, 
  delim=',',
  quote='"',
  columns={'GISJOIN': 'VARCHAR',
           'ADI_NATRANK': 'INT',  
           'ADI_STATERNK': 'INT',
           'FIPS': 'VARCHAR'})
LIMIT 10;

InvalidInputException: Invalid Input Error: Error in file "/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv", on line 2: expected 4 values per row, but got more. (  file=/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv
  delimiter=','
  quote='"'
  escape='"' (default)
  header=1
  sample_size=20480
  ignore_errors=0
  all_varchar=0)

## ChatGPT response

In [11]:
!head /Users/me/Downloads/adi-download/US_2021_ADI_Census\ Block\ Group_v4.csv

"INDEX","GISJOIN","ADI_NATRANK","ADI_STATERNK","FIPS"
"1","G01000100201001","74","5","010010201001"
"2","G01000100201002","74","5","010010201002"
"3","G01000100202001","84","7","010010202001"
"4","G01000100202002","84","7","010010202002"
"5","G01000100203001","79","6","010010203001"
"6","G01000100203002","79","6","010010203002"
"7","G01000100204001","64","3","010010204001"
"8","G01000100204002","64","3","010010204002"
"9","G01000100204003","64","3","010010204003"


In [3]:
%%sql 
SELECT *
FROM read_csv('/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv',
    header=True,
    delim=',',
    quote='"',
    columns={'INDEX': 'BIGINT',
    'GISJOIN': 'VARCHAR',
    'ADI_NATRANK': 'BIGINT',
    'ADI_STATERNK': 'BIGINT',
    'FIPS': 'VARCHAR'}) 
LIMIT 10;


InvalidInputException: Invalid Input Error: Could not convert string 'GQ' to INT64 at line 60 in column "ADI_NATRANK". Parser options:
  file=/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv
  delimiter=','
  quote='"'
  escape='"' (default)
  header=1
  sample_size=20480
  ignore_errors=0
  all_varchar=0 