# Download NYC Building Elevation and Subgrade (BES) Data

Exploring the NYC Building Elevation and Subgrade (BES) Data Set in Python  

Author: Mark Bauer

In [1]:
import duckdb
import requests

In [2]:
# Printing verions of Python modules and packages with **watermark** - the IPython magic extension.
%reload_ext watermark
%watermark -v -p pandas,duckdb

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.4.0

pandas: 1.4.3
duckdb: 0.10.0



In [3]:
# url to data
url = 'https://data.cityofnewyork.us/api/views/bsin-59hv/rows.csv?accessType=DOWNLOAD'

# set file name
file_name = '../data/bes-data.csv'

response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Save the content of the response to a local file
    with open(file_name, 'wb') as f:
        f.write(response.content)
    print(f"CSV file {file_name} downloaded successfully.")
else:
    print("Failed to download CSV file.")

CSV file ../data/bes-data.csv downloaded successfully.


In [6]:
# sanity check
%ls ../data/

bes-data.csv       nfip-data.parquet


In [7]:
# create a DuckDB database instance
con = duckdb.connect()

# create table of the bes data
con.sql(
    """
    CREATE TABLE bes_data AS
    FROM read_csv('../data/bes-data.csv')
    """
)

# examine count of rows
con.sql("SELECT COUNT(*) AS count_rows FROM bes_data").show()

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│     861876 │
└────────────┘



In [8]:
# write table to a Parquet file
con.sql("COPY bes_data TO '../data/bes-data.parquet' (FORMAT PARQUET)")

In [9]:
# sanity check
%ls ../data/

bes-data.csv       bes-data.parquet   nfip-data.parquet


In [10]:
# preview file sizes
!du -h ../data/*

301M	../data/bes-data.csv
 96M	../data/bes-data.parquet
 36M	../data/nfip-data.parquet


In [11]:
# remove CSV file
!rm ../data/bes-data.csv

In [12]:
# list files
%ls ../data/

bes-data.parquet   nfip-data.parquet


In [13]:
# sanity check
con.sql("SELECT count(*) FROM read_parquet('../data/bes-data.parquet')")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│       861876 │
└──────────────┘

In [14]:
# close connection
con.close()