Data from [data.cityofnewyork.us](https://data.cityofnewyork.us/Environment/2018-Central-Park-Squirrel-Census-Hectare-Data/ej9h-v6g2/about_data)

In [1]:
import requests 
import uuid 

In [2]:
response = requests.get("https://data.cityofnewyork.us/api/views/ej9h-v6g2/rows.csv?fourfour=ej9h-v6g2&cacheBust=1681846116&date=20240905&accessType=DOWNLOAD")
print(response.status_code)

200


In [3]:
from yt import wrapper as yt
from yt import type_info

In [4]:
working_dir = f"//tmp/examples/upload-squirrel-hectare-data_{uuid.uuid4()}"
yt.create("map_node", working_dir, recursive=True)
print(working_dir)

//tmp/examples/upload-squirrel-hectare-data_c19fbe86-ed0f-4146-9c72-9ff7a004a310


In [5]:
import io
import csv 

In [6]:
def string_to_int(value):
    if value is None:
        return None
    return int(value)

In [7]:
content = io.StringIO(response.text)
csv_reader = csv.DictReader(content)

In [8]:
data = list(csv_reader)
for record in data:
    for key, value in record.items():
        if value == "":
            record[key] = None
    for key in ["Number of sighters", "Total Time of Sighting", "Number of Squirrels"]:
        record[key] = string_to_int(record[key])
for line in data[:10]:
    print(line)

{'Hectare': '01A', 'Shift': 'AM', 'Date': '10072018', 'Anonymized Sighter': '110', 'Sighter Observed Weather Data': '70º F, Foggy', 'Litter': 'Some', 'Litter Notes': None, 'Other Animal Sightings': 'Humans, Pigeons', 'Hectare Conditions': 'Busy', 'Hectare Conditions Notes': None, 'Number of sighters': 1, 'Number of Squirrels': 4, 'Total Time of Sighting': 22}
{'Hectare': '01A', 'Shift': 'PM', 'Date': '10142018', 'Anonymized Sighter': '177', 'Sighter Observed Weather Data': '54º F, overcast', 'Litter': 'Abundant', 'Litter Notes': None, 'Other Animal Sightings': 'Humans, Pigeons', 'Hectare Conditions': 'Busy', 'Hectare Conditions Notes': None, 'Number of sighters': 1, 'Number of Squirrels': 7, 'Total Time of Sighting': 26}
{'Hectare': '01B', 'Shift': 'AM', 'Date': '10122018', 'Anonymized Sighter': '11', 'Sighter Observed Weather Data': '60º F, sunny', 'Litter': 'Some', 'Litter Notes': None, 'Other Animal Sightings': 'Humans, Dogs, Pigeons, Horses', 'Hectare Conditions': 'Busy', 'Hectare 

In [9]:
schema = yt.schema.TableSchema()
schema.add_column("hectare", type_info.String)
schema.add_column("shift", type_info.String)
schema.add_column("date", type_info.String)
schema.add_column("sighter", type_info.String)
schema.add_column("sighter_observed_weather_data", type_info.Optional[type_info.String])
schema.add_column("litter", type_info.Optional[type_info.String])
schema.add_column("litter_notes", type_info.Optional[type_info.String])
schema.add_column("other_animals_sightings", type_info.Optional[type_info.String])
schema.add_column("hectare_conditions", type_info.Optional[type_info.String])
schema.add_column("hectare_conditions_notes", type_info.Optional[type_info.String])
schema.add_column("number_of_sighters", type_info.Uint16)
schema.add_column("number_of_squirrels", type_info.Uint16)
schema.add_column("total_time_of_sighting", type_info.Optional[type_info.Uint16])

TableSchema({'value': [{'name': 'hectare', 'type_v3': 'string'}, {'name': 'shift', 'type_v3': 'string'}, {'name': 'date', 'type_v3': 'string'}, {'name': 'sighter', 'type_v3': 'string'}, {'name': 'sighter_observed_weather_data', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'litter', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'litter_notes', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'other_animals_sightings', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'hectare_conditions', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'hectare_conditions_notes', 'type_v3': {'type_name': 'optional', 'item': 'string'}}, {'name': 'number_of_sighters', 'type_v3': 'uint16'}, {'name': 'number_of_squirrels', 'type_v3': 'uint16'}, {'name': 'total_time_of_sighting', 'type_v3': {'type_name': 'optional', 'item': 'uint16'}}], 'attributes': {'strict': True, 'unique_keys': False}})

In [10]:
csv_columns = ['Hectare', 'Shift', 'Date', 'Anonymized Sighter', 'Sighter Observed Weather Data', 'Litter', 'Litter Notes', 'Other Animal Sightings', 'Hectare Conditions', 'Hectare Conditions Notes', 'Number of sighters', 'Number of Squirrels', 'Total Time of Sighting']
yt_columns = [col.name for col in schema.columns]

assert len(csv_columns) == len(yt_columns)

yt_data = []
for record in data:
    yt_record = {}
    for csv_key, yt_key in zip(csv_columns, yt_columns):
        yt_record[yt_key] = record[csv_key]
    yt_data.append(yt_record)

for record in yt_data[:10]:
    print(record)

{'hectare': '01A', 'shift': 'AM', 'date': '10072018', 'sighter': '110', 'sighter_observed_weather_data': '70º F, Foggy', 'litter': 'Some', 'litter_notes': None, 'other_animals_sightings': 'Humans, Pigeons', 'hectare_conditions': 'Busy', 'hectare_conditions_notes': None, 'number_of_sighters': 1, 'number_of_squirrels': 4, 'total_time_of_sighting': 22}
{'hectare': '01A', 'shift': 'PM', 'date': '10142018', 'sighter': '177', 'sighter_observed_weather_data': '54º F, overcast', 'litter': 'Abundant', 'litter_notes': None, 'other_animals_sightings': 'Humans, Pigeons', 'hectare_conditions': 'Busy', 'hectare_conditions_notes': None, 'number_of_sighters': 1, 'number_of_squirrels': 7, 'total_time_of_sighting': 26}
{'hectare': '01B', 'shift': 'AM', 'date': '10122018', 'sighter': '11', 'sighter_observed_weather_data': '60º F, sunny', 'litter': 'Some', 'litter_notes': None, 'other_animals_sightings': 'Humans, Dogs, Pigeons, Horses', 'hectare_conditions': 'Busy', 'hectare_conditions_notes': None, 'numb

In [11]:
table_path = f"{working_dir}/squirrels-hectare-data"
print(table_path)
yt.create("table", table_path, force=True, attributes={"schema": schema.to_yson_type()})

//tmp/examples/upload-squirrel-hectare-data_c19fbe86-ed0f-4146-9c72-9ff7a004a310/squirrels-hectare-data


'307e-f925-13440191-706aeb65'

In [12]:
yt.write_table(table_path, yt_data)
print(table_path)

//tmp/examples/upload-squirrel-hectare-data_c19fbe86-ed0f-4146-9c72-9ff7a004a310/squirrels-hectare-data
