# Data Retrieval

This notebook imports datasets related to food waste in NYC for analysis according to the project proposal.

In [28]:
# Import required libraries
import pandas as pd
import numpy as np
import requests
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sodapy import Socrata
from datetime import datetime
# Create a data directory if it doesn't exist
os.makedirs('data', exist_ok=True)

## 1. NYC Department of Sanitation Annual Reports

Data on organic waste collections across NYC neighborhoods.

In [29]:
# NYC Department of Sanitation data from NYC Open Data
# Using DSNY Monthly Tonnage Data
client = Socrata("data.cityofnewyork.us", None)

# Fetch DSNY Monthly Tonnage Data (limited to 10000 records)
results = client.get("ebb7-mvp5", limit=10000)

# Convert to pandas DataFrame
dsny_tonnage_df = pd.DataFrame.from_records(results)

try:
    dsny_tonnage_df.to_csv('data/dsny_tonnage.csv', index=False)
    print("DSNY tonnage data saved successfully")
except Exception as e:
    print(f"Error saving DSNY tonnage data: {e}")

print("DSNY Tonnage Data Shape:", dsny_tonnage_df.shape)

dsny_tonnage_df.head()



DSNY tonnage data saved successfully
DSNY Tonnage Data Shape: (10000, 11)


Unnamed: 0,month,borough,communitydistrict,refusetonscollected,papertonscollected,mgptonscollected,resorganicstons,schoolorganictons,borough_id,leavesorganictons,xmastreetons
0,2025 / 03,Bronx,1,559.76,31.32,31.37,2.1,22.71,2,,
1,2025 / 03,Bronx,2,389.46,24.74,29.4,0.46,,2,,
2,2025 / 03,Bronx,3,514.12,32.59,36.62,1.72,24.36,2,,
3,2025 / 03,Bronx,4,967.54,54.2,67.01,1.93,15.09,2,,
4,2025 / 03,Bronx,5,888.67,51.25,77.15999999999998,2.07,18.71,2,,


## 2. NYC Open Data - Community Gardens

In [30]:
# GreenThumb Garden Info
results = client.get("p78i-pat6", limit=1000)
community_gardens_df = pd.DataFrame.from_records(results)
try:
    community_gardens_df.to_csv('data/community_gardens.csv', index=False)
    print("Community gardens data saved successfully")
except Exception as e:
    print(f"Error saving community gardens data: {e}")

print("\nCommunity Gardens Data Shape:", community_gardens_df.shape)

community_gardens_df.head()

Community gardens data saved successfully

Community Gardens Data Shape: (630, 27)


Unnamed: 0,assemblydist,address,borough,communityboard,congressionaldist,coundist,gardenname,juris,multipolygon,openhrsf,...,policeprecinct,statesenatedist,status,zipcode,bbl,nta,censustract,lat,lon,crossstreets
0,79,345 East 153rd Street,X,201,15,17,Melrose Houses Community Garden - Melrose Hous...,NYCHA,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...",10:00 A.M. - 11:310:00 A.M.- 11:30 A.M & 3:00 ...,...,40,29,Active,10451,2024130001,BX34 /,67.0,40.820762,-73.919546,Morris Ave and courtland ave
1,83,3601 Marolla Place,X,212,16,12,Ujamaa Northeast Community Garden,DOT,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...",,...,47,36,Active,10466,2049220087,BX03 /,484.01,40.884047,-73.834892,
2,87,2225 Lacombe Avenue,X,209,14,18,Karol's Urban Farm - Castle Hill Houses (NYCHA),NYCHA,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...",12:00 P.M - 4:00 P.M,...,43,34,Active,10473,2035370001,BX09 /,90.0,40.818149,-73.84623,Castle Hill Avenue & Lacombe Avenue
3,79,"950 Union Ave, Bronx, NY 10459",X,203,15,17,Union Garden -Union Houses -(NYCHA),NYCHA,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...",9:00 - 11:00 am,...,42,32,Active,10459,2026780001,/,,,,East 163rd street & Union Avenue
4,79,1460 Washington Avenue,X,203,15,16,Flower Garden- Morris II Houses (NYCHA),NYCHA,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...",12:00 P.M - 4:00 P.M,...,42,32,Active,10456,2029110001,BX01 /,147.02,40.83634,-73.903333,3rd Avenue & East 170th street


## 3. NYC Restaurant Inspection Results

In [31]:
# NYC Restaurant Inspection Results
results = client.get("43nn-pn8j", limit=10000)
restaurant_inspections_df = pd.DataFrame.from_records(results)

try:
    restaurant_inspections_df.to_csv('data/restaurant_inspections.csv', index=False)
    print("Restaurant inspections data saved successfully")
except Exception as e:
    print(f"Error saving restaurant inspections data: {e}")
    
print("\nRestaurant Inspections Data Shape:", restaurant_inspections_df.shape)

restaurant_inspections_df.head()
    

Restaurant inspections data saved successfully

Restaurant Inspections Data Shape: (10000, 26)


Unnamed: 0,camis,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,record_date,...,bbl,nta,cuisine_description,action,violation_code,violation_description,score,inspection_type,grade,grade_date
0,50104541,PURGATORY,Brooklyn,675,CENTRAL AVENUE,11207,4233667574,1900-01-01T00:00:00.000,Not Applicable,2025-03-30T06:00:14.000,...,3034470001,BK78,,,,,,,,
1,50161118,RU YI CHINESE RESTAURANT INC.,Brooklyn,7706,NEW UTRECHT AVENUE,11214,9173880335,1900-01-01T00:00:00.000,Not Applicable,2025-03-30T06:00:14.000,...,3062470028,BK28,,,,,,,,
2,50163452,COUSIN 2 INC.,Queens,13335,ROOSEVELT AVE,11354,9299908888,1900-01-01T00:00:00.000,Not Applicable,2025-03-30T06:00:14.000,...,4049730016,QN22,,,,,,,,
3,50149982,RUDIN - 3TS CAFE,Manhattan,3,TIMES SQ,10036,7043286229,1900-01-01T00:00:00.000,Not Applicable,2025-03-30T06:00:14.000,...,1010140033,MN17,,,,,,,,
4,50116205,THE SOUP BOWL,Brooklyn,302,7 AVENUE,11215,9178806110,2024-10-28T00:00:00.000,Not Critical,2025-03-30T06:00:11.000,...,3010000044,BK37,Soups/Salads/Sandwiches,Violations were cited in the following area(s).,09B,Thawing procedure improper.,22.0,Cycle Inspection / Initial Inspection,,


## 4. Historical Weather Data for NYC

In [32]:
try:
    results = client.get("if26-z6xq", limit=2000)
    nyc_weather_df = pd.DataFrame.from_records(results)
except Exception as e:
    print(f"Error fetching weather data: {e}")

try:
    nyc_weather_df.to_csv('data/nyc_weather.csv', index=False)
    print("NYC weather data saved successfully")
except Exception as e:
    print(f"Error saving NYC weather data: {e}")

print("\nNYC Weather Data Shape:", nyc_weather_df.shape)

nyc_weather_df.head()

NYC weather data saved successfully

NYC Weather Data Shape: (591, 32)


Unnamed: 0,borough,ntaname,food_scrap_drop_off_site,location,hosted_by,open_months,operation_day_hours,notes,borocd,councildist,...,:@computed_region_efsh_h5xi,:@computed_region_f5dn_yrer,:@computed_region_yeji_bk3q,:@computed_region_92fq_4b7q,:@computed_region_sbqj_enih,website,app_android,app_ios,bbl,bin
0,Brooklyn,Bay Ridge,4th Avenue Presbyterian Church,"6753 4th Avenue, Brooklyn, NY 11220",4th Avenue Presbyterian Church,Year Round,Every day (Start Time: Dawn - End Time: Dusk),"No meat, bones, or dairy.",310,47,...,18180.0,10,2,44,41,,,,,
1,Manhattan,East Midtown-Turtle Bay,Dag Hammarskjold Plaza Greenmarket,E 47th St & 2nd Ave,GrowNYC,Year Round,Wednesday (Start Time: 8:00 AM - End Time: 12...,,106,4,...,12079.0,71,4,51,9,grownyc.org/compost,,,,
2,Manhattan,Hell's Kitchen,Hudson River Park's Pier 84 at W. 44th St.,Pier 84 at W. 44th St. near dog park,Staff at Hudson River Park,Year Round,Every day (Start Time: 7:00 AM - End Time: 7:...,,104,3,...,,12,4,10,10,https://hudsonriverpark.org/the-park/sustainab...,,,,
3,Manhattan,East Midtown-Turtle Bay,58th Street Library FSDO,127 East 58th Street,GrowNYC,Year Round,Wednesdays (Start Time: 7:30 AM - End Time: 1...,,105,4,...,12419.0,11,4,51,10,grownyc.org/compost,,,,
4,Manhattan,Tribeca-Civic Center,Tribeca Greenmarket,Greenwich St. & Duane St,GrowNYC,Year Round,Saturday (Start Time: 8:00 AM - End Time: 1:0...,,101,1,...,12076.0,56,4,32,1,grownyc.org/compost,,,,
