In [None]:
import os
import pandas as pd
import numpy as np

from sodapy import Socrata
from socrata.authorization import Authorization

from dotenv import load_dotenv

Dataset: https://{domain}/resource/{identifier}.json

Count: https://{domain}/views.json?count=True

Schema: https://{domain}/views/{identifier}.json

Metadata: https://{domain}/views.json?limit=200&page=1

In [3]:
# Tokens are optional (`None` can be used instead), though requests will be rate limited.
#
# If you choose to use a token, run the following command on the terminal (or add it to your .bashrc)
# $ export SODAPY_APPTOKEN=<token>
load_dotenv()
socrata_token = os.getenv("NYC_SOCRATA_TOKEN")

In [None]:
# Dataset: https://{domain}/resource/{identifier}.json
# Metadata: https://{domain}/views/{identifier}.json
nyc_domain = "data.cityofnewyork.us"
nyc_dataset_identifier = "fhrw-4uyv" # 311 data
# nyc_client = Socrata(nyc_domain, username=os.getenv("NYC_SOCRATA_USERNAME"), password=os.getenv("NYC_SOCRATA_PASSWORD"), app_token=os.getenv("NYC_SOCRATA_TOKEN"), timeout=60)
nyc_client = Socrata(nyc_domain, socrata_token, timeout=600) # Often takes longer than default 10 sec timeout to load
nyc_results = nyc_client.get(nyc_dataset_identifier)
nyc_df = pd.DataFrame.from_dict(nyc_results)
print("Domain: {domain:}\nSession: {session:}\nURI Prefix: {uri_prefix:}".format(**nyc_client.__dict__))
print(nyc_df.shape)

Domain: data.cityofnewyork.us
Session: <requests.sessions.Session object at 0x00000205E3DEF460>
URI Prefix: https://
(1000, 50)


In [None]:
# Experimenting with smaller NOLA data
nola_domain = "data.nola.gov"
nola_dataset_identifier = "2mq3-p3xc" # ID from previous study that no longer works ("You don't have the security clearance to view this page.")
# nola_client = Socrata(nola_domain, username=os.getenv("nola_SOCRATA_USERNAME"), password=os.getenv("nola_SOCRATA_PASSWORD"), app_token=os.getenv("nola_SOCRATA_TOKEN"), timeout=60)
nola_client = Socrata(nola_domain, socrata_token)
nola_results = nola_client.get(nola_dataset_identifier)
nola_df = pd.DataFrame.from_dict(nola_results)
print("Domain: {domain:}\nSession: {session:}\nURI Prefix: {uri_prefix:}".format(**nola_client.__dict__))
print(nola_df.shape)

HTTPError: 403 Client Error: Forbidden.
	You must be logged in to access this resource

In [None]:
# extract tree-related complaints
# Example on sodapy github that did not work because Chattanooga data changed
tree_related = pd.concat(
    [
        nyc_df.complaint_type.str.contains(r"[T|t]ree").value_counts(),
    ],
    axis=1,
    keys=["nyc"],
)
tree_related.div(tree_related.sum()).round(2)