# Data download

https://www.neighborhoodatlas.medicine.wisc.edu/

In [42]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Load polars, which enables loading of parquet files
import polars as pl

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


## Minimal duckdb query to standardize the ADI file 

Prompt for Claude:

```
%%sql
SELECT *
FROM read_csv('https://data.cityofnewyork.us/api/views/erm2-nwe9/rows.csv?accessType=DOWNLOAD',
    header=True,
    delim=',',
    quote='"',
    columns={'Unique Key': 'BIGINT',
    'Created Date': 'VARCHAR',
    'Closed Date': 'VARCHAR',
    'Agency': 'VARCHAR',
    'Agency Name': 'VARCHAR',
    'Complaint Type': 'VARCHAR',
    'Descriptor': 'VARCHAR',
    'Location Type': 'VARCHAR',
    'Incident Zip': 'VARCHAR',
    'Incident Address': 'VARCHAR',
    'Street Name': 'VARCHAR',
    'Cross Street 1': 'VARCHAR',
    'Cross Street 2': 'VARCHAR',
    'Intersection Street 1': 'VARCHAR',
    'Intersection Street 2': 'VARCHAR',
    'Address Type': 'VARCHAR',
    'City': 'VARCHAR',
    'Landmark': 'VARCHAR',
    'Facility Type': 'VARCHAR',
    'Status': 'VARCHAR',
    'Due Date': 'VARCHAR',
    'Resolution Description': 'VARCHAR',
    'Resolution Action Updated Date': 'VARCHAR',
    'Community Board': 'VARCHAR',
    'BBL': 'VARCHAR',
    'Borough': 'VARCHAR',
    'X Coordinate (State Plane)': 'VARCHAR',
    'Y Coordinate (State Plane)': 'VARCHAR',
    'Open Data Channel Type': 'VARCHAR',
    'Park Facility Name': 'VARCHAR',
    'Park Borough': 'VARCHAR',
    'Vehicle Type': 'VARCHAR',
    'Taxi Company Borough': 'VARCHAR',
    'Taxi Pick Up Location': 'VARCHAR',
    'Bridge Highway Name': 'VARCHAR',
    'Bridge Highway Direction': 'VARCHAR',
    'Road Ramp': 'VARCHAR',
    'Bridge Highway Segment': 'VARCHAR',
    'Latitude': 'DOUBLE',
    'Longitude': 'DOUBLE',
    'Location': 'VARCHAR'}) 
LIMIT 10;

Please use the above query example and rewrite it for the file at `/Users/me/Downloads/adi-download/US_2021_ADI_Census\ Block\ Group_v4.csv` that has the following header:

```"","GISJOIN","ADI_NATRANK","ADI_STATERNK","FIPS"
"1","G01000100201001","74","5","010010201001"
"2","G01000100201002","74","5","010010201002"
"3","G01000100202001","84","7","010010202001"
"4","G01000100202002","84","7","010010202002"
"5","G01000100203001","79","6","010010203001"
"6","G01000100203002","79","6","010010203002"
"7","G01000100204001","64","3","010010204001"
"8","G01000100204002","64","3","010010204002"
"9","G01000100204003","64","3","010010204003"
```
```

In [3]:
!head /Users/me/Downloads/adi-download/2021\ ADI_Census\ Block\ Group_v4.0_ReadMe.txt

2021 Block Group ADI Files v4.0

These files contain a linkage between the Census block group and the ADI score.
The file contains four relevant fields:
- GISJOIN: Key linkage field to the block group shapefile served by NHGIS
- FIPS: The block group Census ID
- ADI_NATRANK: National percentile of block group ADI score
- ADI_STATERNK: State-specific decile of block group ADI score




In [4]:
!head /Users/me/Downloads/adi-download/US_2021_ADI_Census\ Block\ Group_v4.csv

"INDEX","GISJOIN","ADI_NATRANK","ADI_STATERNK","FIPS"
"1","G01000100201001","74","5","010010201001"
"2","G01000100201002","74","5","010010201002"
"3","G01000100202001","84","7","010010202001"
"4","G01000100202002","84","7","010010202002"
"5","G01000100203001","79","6","010010203001"
"6","G01000100203002","79","6","010010203002"
"7","G01000100204001","64","3","010010204001"
"8","G01000100204002","64","3","010010204002"
"9","G01000100204003","64","3","010010204003"


In [11]:
%%sql
SELECT *
FROM read_csv('/Users/me/Downloads/adi-download/US_2021_ADI_Census Block Group_v4.csv', 
  header=True, 
  delim=',',
  quote='"',
  skip=2,
  columns={'INDEX': 'INT',
           'GISJOIN': 'VARCHAR',
           'ADI_NATRANK': 'VARCHAR',  
           'ADI_STATERNK': 'VARCHAR',
           'FIPS': 'VARCHAR'})
LIMIT 10;

Unnamed: 0,INDEX,GISJOIN,ADI_NATRANK,ADI_STATERNK,FIPS
0,3,G01000100202001,84,7,10010202001
1,4,G01000100202002,84,7,10010202002
2,5,G01000100203001,79,6,10010203001
3,6,G01000100203002,79,6,10010203002
4,7,G01000100204001,64,3,10010204001
5,8,G01000100204002,64,3,10010204002
6,9,G01000100204003,64,3,10010204003
7,10,G01000100204004,64,3,10010204004
8,11,G01000100205011,65,4,10010205011
9,12,G01000100205012,65,4,10010205012


In [47]:
%%sql
CREATE TABLE test (
    name            VARCHAR,
    ind             INT,
);

Unnamed: 0,Count


In [48]:
%%sql
INSERT INTO test VALUES ('first', 1);

Unnamed: 0,Count
0,1


In [49]:
%%sql
INSERT INTO test VALUES ('second', NULL);

Unnamed: 0,Count
0,1


In [50]:
%%sql
DESCRIBE test;

Unnamed: 0,column_name,column_type,null,key,default,extra
0,name,VARCHAR,YES,,,
1,ind,INTEGER,YES,,,


In [51]:
%%sql 
SELECT * FROM test;

Unnamed: 0,name,ind
0,first,1.0
1,second,


# Testing visualizations using a parquet file

In [14]:
import vegafusion as vf
import polars as pl
import altair as alt
from vega_datasets import data
alt.data_transformers.disable_max_rows()
alt.renderers.enable('html')

# Configure DuckDB connection
vf.runtime.set_connection("duckdb")

# Enable Mime Renderer
vf.enable(row_limit=100000000)

vegafusion.enable(mimetype='html', row_limit=100000000, embed_options=None)

In [58]:
area_deprivation_index = pl.read_parquet('/tmp/area_deprivation_index.parquet')

In [59]:
area_deprivation_index[50:70]

INDEX,GISJOIN,ADI_NATRANK,ADI_STATERNK,FIPS
i32,str,i32,i32,i64
51,"""G0100030010300…",49.0,2.0,10030103001
52,"""G0100030010300…",49.0,2.0,10030103002
53,"""G0100030010300…",49.0,2.0,10030103003
54,"""G0100030010300…",49.0,2.0,10030103004
55,"""G0100030010300…",49.0,2.0,10030103005
56,"""G0100030010400…",73.0,5.0,10030104001
57,"""G0100030010400…",73.0,5.0,10030104002
58,"""G0100030010400…",73.0,5.0,10030104003
59,"""G0100030010500…",,,10030105001
60,"""G0100030010500…",70.0,4.0,10030105002
