### Dependecies and necessary tools

In [1]:
!pip install sodapy

Collecting sodapy
  Downloading sodapy-2.1.0-py2.py3-none-any.whl (14 kB)
Installing collected packages: sodapy
Successfully installed sodapy-2.1.0


In [2]:
#Importing dependencies
import pandas as pd
from sodapy import Socrata
import scipy.stats as st
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pymongo

# Importing data for API call
from config import app_token
from config import username
from config import password

### EXTRACT 

In [3]:
# Getting the data from API NY Open Data for the first database: 
client = Socrata("data.cityofnewyork.us",
                  app_token, username,
                  password)
# Database returned as JSON from API / converted to Python list of dictionaries by sodapy.
results = client.get("gaq9-z3hz", limit=2832)

# Convert to pandas DataFrame
Recycling_Div_Captures = pd.DataFrame.from_records(results)
Recycling_Div_Captures

Unnamed: 0,_zone,district,fiscal_month_number,fiscal_year,month_name,diversion_rate_total_total_recycling_total_waste_,capture_rate_paper_total_paper_max_paper_,capture_rate_mgp_total_mgp_max_mgp_,capture_rate_total_total_recycling_leaves_recycling_max_paper_max_mgp_x100
0,Brooklyn North,BKN01,10,2019,April,14.6870926033314,44.9091597758182,43.0340618265488,44.1467643388238
1,Brooklyn North,BKN02,10,2019,April,19.9501814568266,34.1940201111571,57.9470313498761,41.2136999269730
2,Brooklyn North,BKN03,10,2019,April,12.1641613296565,33.5215567195417,44.9197311047560,38.1559365633997
3,Brooklyn North,BKN04,10,2019,April,15.5418031271589,35.2113607057133,68.5112600518364,48.7507554470644
4,Brooklyn North,BKN05,10,2019,April,10.0518454148520,22.2654301541493,45.0517913754661,31.5301290076712
...,...,...,...,...,...,...,...,...,...
2827,Queens West,QW06,3,2016,September,20.1214735183925,30.4196893217642,67.9561359185218,39.0333079418224
2828,Queens West,QW09,3,2016,September,17.4348618222880,41.1065470054454,79.6950721141275,54.2727362237205
2829,Staten Island,SI01,3,2016,September,18.6838350777916,39.5199429139324,71.7164916919349,49.6526689393673
2830,Staten Island,SI02,3,2016,September,19.0168187778538,44.4579262409406,74.9601995452711,54.0574389302287


In [4]:
# creating path for the citywide subsort 
data_path = "Resources/Citywide_Subsort.csv"
# Read the data into df
cws_df = pd.read_csv(data_path)
#Preview data
cws_df.head()

Unnamed: 0,Material,Aggregate Percent,Refuse Percent,MGP Percent,Paper Percent,Organic Percent,Material Group,DSNY Diversion Summary Category,Location
0,Newspaper,0.019,0.011,0.005,0.098,0.0,Paper,Designated Paper,Citywide
1,Plain OCC/Kraft Paper,0.056,0.012,0.01,0.464,0.002,Paper,Designated Paper,Citywide
2,Paper Bags: Kraft Grocery,0.002,0.001,0.0,0.008,0.0,Paper,Designated Paper,Citywide
3,Paper Bags: Non-food Retail,0.002,0.002,0.0,0.008,0.0,Paper,Designated Paper,Citywide
4,Paper Bags: Fast Food Bags,0.001,0.002,0.0,0.001,0.0,Paper,Designated Paper,Citywide


In [5]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Trying to download new driver from http://chromedriver.storage.googleapis.com/86.0.4240.22/chromedriver_win32.zip


 


[WDM] - Driver has been saved in cache [C:\Users\wuyah\.wdm\drivers\chromedriver\win32\86.0.4240.22]


In [8]:
url = 'https://data.cityofnewyork.us/City-Government/DSNY-Waste-Characterization-Mainsort/k3ks-jzek'
browser.visit(url)

In [9]:
df_list = []
for x in range(1,41):
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    table = soup.find_all('div', class_='socrata-visualization')
    df_list.append(pd.read_html(str(table))[0]) 
    button = browser.find_by_xpath('//*[@id="app"]/div/div[2]/section[4]/div[2]/div/div/div[4]/div[2]/span[1]/button[2]').click()

In [11]:
df_combined = pd.concat(df_list)

In [12]:
df_combined.reset_index(drop=True, inplace=True)

In [13]:
cwm_df = df_combined
cwm_df.head()

Unnamed: 0,Material,Aggregate Percent,Refuse Percent,MGP Percent,Paper Percent,Organic Percent,Material Group,DSNY Diversion Summary Category,Location
0,Newspaper,1.9%,1.1%,0.5%,9.8%,0%,Paper,Designated Paper,Citywide
1,Plain OCC/Kraft Paper,5.7%,1.4%,1%,47.2%,0.2%,Paper,Designated Paper,Citywide
2,High Grade Paper,0.7%,0.5%,0.1%,2.6%,0%,Paper,Designated Paper,Citywide
3,Mixed Low Grade Paper,8.7%,6.8%,2.4%,31.4%,0.9%,Paper,Designated Paper,Citywide
4,Paper: Compostable/Soiled/Waxed OCC/Kraft,8.1%,9.8%,0.7%,1.4%,2%,Paper,Organics Suitable for Composting,Citywide


### TRANSFORM

In [14]:
# Analizing material groups
unique_mat_subsort = cws_df["Material Group"].unique()
print(f'Material groups (subsort): {unique_mat_subsort}')
unique_mat_mainsort = cwm_df["Material Group"].unique()
print(f'Material groups (mainsort): {unique_mat_mainsort}')

Material groups (subsort): ['Paper' 'Plastic' 'Glass' 'Metal' 'Organic' 'E-Waste' 'C&D'
 'Special Waste' 'Miscellaneous Inorganics']
Material groups (mainsort): ['Paper' 'Plastic' 'Glass' 'Metal' 'Organic' 'E-Waste' 'C&D'
 'Special Waste' 'Miscellaneous Inorganics']


In the general database (Recycling_Div_Captures) is possible to see that there is just data for paper and MGP, so from this dataset, information just for those materials will be taken. 
The materials will be group in the next collections for a non-relational dabatase:
- Paper
- MGP (Metal, Glass, and Plastic)
- Organic
- E- Waste
- Special Waste
- Others

In [15]:
unique_loc_subsort = cws_df["Location"].unique()
print(f"Locations register in the Subsort dataset: {unique_loc_subsort}")
print("----------------------------------------------------------------")
unique_loc_mainsort = cwm_df["Location"].unique()
print(f"Locations register in the Mainsort dataset: {unique_loc_mainsort}")
print("----------------------------------------------------------------")
uniqie_loc_general = Recycling_Div_Captures["_zone"].unique()
print(f"Locations register in the general dataset: {uniqie_loc_general}")

Locations register in the Subsort dataset: ['Citywide' 'Brooklyn' 'Queens' 'Manhattan' 'Staten Island' 'Bronx'
 'Schools' 'NYCHA']
----------------------------------------------------------------
Locations register in the Mainsort dataset: ['Citywide' 'Brooklyn' 'Queens' 'Manhattan' 'Staten Island' 'Bronx'
 'Schools' 'NYCHA']
----------------------------------------------------------------
Locations register in the general dataset: ['Brooklyn North' 'Brooklyn South' 'Bronx' 'Manhattan' 'Queens East'
 'Queens West' 'Staten Island']


Analyzing the locations in all the databases, we decide to summarize and filtering them and their data just for the 5 boroughs.
- Manhattan
- Queens (which includes 'Queens East' and 'Queens West')
- Brooklyn (which includes 'Brooklyn North' and 'Brooklyn South')
- Bronx
- Staten Island


Each material collection will have some next fields per location (materials like paper and MGP will have average capture rates taken from the general dataset, but the others will not because there is not information):
- Average aggregate percent subsort
- Average refuse percent subsort
- Average aggregate percent mainsort
- Average refuse percent mainsort
- Average capture rate

In [16]:
# Format change in the general dataset
Recycling_Div_Captures["_zone"]= Recycling_Div_Captures["_zone"].str.replace(r'Brooklyn North', r'Brooklyn')
Recycling_Div_Captures["_zone"]= Recycling_Div_Captures["_zone"].str.replace(r'Brooklyn South', r'Brooklyn')
Recycling_Div_Captures["_zone"]= Recycling_Div_Captures["_zone"].str.replace(r'Queens East', r'Queens')
Recycling_Div_Captures["_zone"]= Recycling_Div_Captures["_zone"].str.replace(r'Queens West', r'Queens')

In [17]:
# First filter: Recycling_Div_Captures data should be filter just for 2017 year
Recycling_Div_Captures["fiscal_year"] = Recycling_Div_Captures["fiscal_year"].apply(pd.to_numeric, errors='coerce')
Recycling_Div_Captures = Recycling_Div_Captures.loc[Recycling_Div_Captures["fiscal_year"]==2017,:]

In [18]:
# Rename columns and convert to_numeric 
Recycling_Div_Captures = Recycling_Div_Captures.rename(columns={
    "diversion_rate_total_total_recycling_total_waste_": "Total recycling rate",
    "capture_rate_paper_total_paper_max_paper_": "Paper rate",
    "capture_rate_mgp_total_mgp_max_mgp_": "MGP rate"
})
cols = ["Total recycling rate", "Paper rate", "MGP rate"]
Recycling_Div_Captures[cols] = Recycling_Div_Captures[cols].apply(pd.to_numeric, errors='coerce')

In [19]:
# Subsort percentages format 
cols = ["Aggregate Percent", "Refuse Percent", "MGP Percent", "Paper Percent", "Organic Percent"]
cws_df[cols] = cws_df[cols] *100
cws_df.head()

Unnamed: 0,Material,Aggregate Percent,Refuse Percent,MGP Percent,Paper Percent,Organic Percent,Material Group,DSNY Diversion Summary Category,Location
0,Newspaper,1.9,1.1,0.5,9.8,0.0,Paper,Designated Paper,Citywide
1,Plain OCC/Kraft Paper,5.6,1.2,1.0,46.4,0.2,Paper,Designated Paper,Citywide
2,Paper Bags: Kraft Grocery,0.2,0.1,0.0,0.8,0.0,Paper,Designated Paper,Citywide
3,Paper Bags: Non-food Retail,0.2,0.2,0.0,0.8,0.0,Paper,Designated Paper,Citywide
4,Paper Bags: Fast Food Bags,0.1,0.2,0.0,0.1,0.0,Paper,Designated Paper,Citywide


In [20]:
# Mainsort percentages format 
cols = ["Aggregate Percent", "Refuse Percent", "MGP Percent", "Paper Percent", "Organic Percent"]
cwm_df["Aggregate Percent"]= cwm_df["Aggregate Percent"].str.replace(r'%', r'')
cwm_df["Refuse Percent"]= cwm_df["Refuse Percent"].str.replace(r'%', r'')
cwm_df["MGP Percent"]= cwm_df["MGP Percent"].str.replace(r'%', r'')
cwm_df["Paper Percent"]= cwm_df["Paper Percent"].str.replace(r'%', r'')
cwm_df["Organic Percent"]= cwm_df["Organic Percent"].str.replace(r'%', r'')

cwm_df[cols]= cwm_df[cols].apply(pd.to_numeric, errors='coerce')
print(cwm_df.dtypes)
cwm_df.head()

Material                            object
Aggregate Percent                  float64
Refuse Percent                     float64
MGP Percent                        float64
Paper Percent                      float64
Organic Percent                    float64
Material Group                      object
DSNY Diversion Summary Category     object
Location                            object
dtype: object


Unnamed: 0,Material,Aggregate Percent,Refuse Percent,MGP Percent,Paper Percent,Organic Percent,Material Group,DSNY Diversion Summary Category,Location
0,Newspaper,1.9,1.1,0.5,9.8,0.0,Paper,Designated Paper,Citywide
1,Plain OCC/Kraft Paper,5.7,1.4,1.0,47.2,0.2,Paper,Designated Paper,Citywide
2,High Grade Paper,0.7,0.5,0.1,2.6,0.0,Paper,Designated Paper,Citywide
3,Mixed Low Grade Paper,8.7,6.8,2.4,31.4,0.9,Paper,Designated Paper,Citywide
4,Paper: Compostable/Soiled/Waxed OCC/Kraft,8.1,9.8,0.7,1.4,2.0,Paper,Organics Suitable for Composting,Citywide


In [21]:
# Paper collection
# Materials paper types register in subsort
paper_material_sub = cws_df.loc[cws_df["Material Group"]=="Paper",["Material"]]
paper_material_sub = paper_material_sub["Material"].unique()
set_sub = set(paper_material_sub)

In [22]:
# Materials paper types register in mainsort
paper_material_main = cwm_df.loc[cws_df["Material Group"]=="Paper",["Material"]]
paper_material_main = paper_material_main["Material"].unique()
set_main = set(paper_material_main)

In [23]:
# Mergin material without duplication 
no_duplicates = list(set_main-set_sub)
paper_material_sub = list(set_sub)
paper_material_final = paper_material_sub + no_duplicates
paper_material_final

['Compostable/School Paper Boats',
 'Paper Bags: Compostable/Soiled',
 'Paper Bags: Fast Food Bags',
 'Beverage Cartons & Aseptic Boxes',
 'High Grade Paper',
 'Mixed Low Grade Paper',
 'Plain OCC/Kraft Paper',
 'Paper Bags: Non-food Retail',
 'Compostable/School Plates/Trays',
 'Drinking Cups - Paper and Plastic: Paper - Non-Compostable',
 'Paper Bags: Kraft Grocery',
 'Shredded Paper',
 'Compostable/Soiled Paper/Waxed OCC/Kraft',
 'Newspaper',
 'Drinking Cups - Paper and Plastic: Paper - Compostable',
 'Other Non-recyclable Paper',
 '#2 HDPE Natural Bottles',
 'Aluminum Cans',
 'Empty Aerosol Cans',
 'Non-C&D Wood',
 'Film Plastic: Retail Bags/Sleeves',
 'Other Ferrous',
 'Other Plastic Bottles',
 '#1 PET Bottles',
 'Yard Waste',
 'Paper: Compostable/Soiled/Waxed OCC/Kraft',
 'Aluminum Foil/Containers',
 'Other Non-Ferrous',
 'Film Plastic: Garbage Bags',
 'Other Color Container Glass',
 '#6 EPS Containers/Packaging (including Single Use)',
 'Other Aluminum',
 'Film Plastic: All Othe

In [26]:
# Filter just papper material group subsort
paper_sub_df = cws_df.loc[cws_df["Material Group"]=="Paper",:]
# Filter just papper material group mainsort
paper_main_df = cwm_df.loc[cws_df["Material Group"]=="Paper",:]

# Group and calculate the mean of aggregate and refuse percentage per location
agg_subsort = paper_sub_df.groupby("Location")["Aggregate Percent"].mean()
refuse_subsort = paper_sub_df.groupby("Location")["Refuse Percent"].mean()
agg_mainsort = paper_main_df.groupby("Location")["Aggregate Percent"].mean()
refuse_mainsort = paper_main_df.groupby("Location")["Refuse Percent"].mean()

# Collection paper will have Average capture rate taken from Recycling_Div_Captures DataFrame
avg_cap_rate = Recycling_Div_Captures.groupby("_zone")["Paper rate"].mean()

In [27]:
# MGP collection
# 'Plastic' 'Glass' 'Metal'
# Types of Metal register in subsort
Metals_sub = cws_df.loc[(cws_df["Material Group"]=="Metal"),["Material"]]
Metals_sub = Metals_sub["Material"].unique()
set_sub = set(Metals_sub)
# Types of MGP materials register in mainsort
Metals_main = cwm_df.loc[(cwm_df["Material Group"]=="Metal"),["Material"]]
Metals_main = Metals_main["Material"].unique()
set_main = set(Metals_main)
# Mergin types of materials without duplication 
no_duplicates = list(set_main-set_sub)
Metals_sub = list(set_sub)
Metals_final= Metals_sub + no_duplicates
Metals_final

['Mixed Metals',
 'Other Aluminium',
 'Aluminum Cans',
 'Other Ferrous Metal',
 'Aluminum Foil/Containers',
 'Empty Aerosol Cans',
 'Appliances: Non-ferrous',
 'Other Non-ferrous',
 'Appliances: Ferrous',
 'Steel/Tin Food Cans',
 'Other Non-Ferrous',
 'Other Aluminum',
 'Appliances: Non-Ferrous',
 'Other Ferrous']

In [28]:
# Types of glass register in subsort
Glass_sub = cws_df.loc[(cws_df["Material Group"]=="Glass"),["Material"]]
Glass_sub = Glass_sub["Material"].unique()
set_sub = set(Glass_sub)
# Types of MGP materials register in mainsort
Glass_main = cwm_df.loc[(cwm_df["Material Group"]=="Glass"),["Material"]]
Glass_main = Glass_main["Material"].unique()
set_main = set(Glass_main)
# Mergin types of materials without duplication 
no_duplicates = list(set_main-set_sub)
Glass_sub = list(set_sub)
Glass_final= Glass_sub + no_duplicates
Glass_final

['Clear Container Glass',
 'Green Container Glass',
 'Mixed Cullet',
 'Brown Container Glass',
 'Other Color Container Glass',
 'Other Glass']

In [29]:
# Types of plastic register in subsort
Plas_sub = cws_df.loc[(cws_df["Material Group"]=="Plastic"),["Material"]]
Plas_sub = Plas_sub["Material"].unique()
set_sub = set(Plas_sub)
# Types of MGP materials register in mainsort
Plas_main = cwm_df.loc[(cwm_df["Material Group"]=="Plastic"),["Material"]]
Plas_main = Plas_main["Material"].unique()
set_main = set(Plas_main)
# Mergin types of materials without duplication 
no_duplicates = list(set_main-set_sub)
Plas_sub = list(set_sub)
Plastic_final= Plas_sub + no_duplicates
Plastic_final

['Plastic Bottles: #7 Other',
 'Rigid Plastics: Rigid Container/Pkg: #2 HDPE Natural',
 'Bulk/Rigid Plastics: Toys/Housewares: Other & Unlabeled',
 'Plastic Bottles: #5 PP Rigid',
 'Rigid Plastics: Rigid Container/Pkg: #4 LDPE/LLDPE',
 'Other Plastic Bags & Film: Non-PE Film',
 'Appliances: Plastic: #7 Other',
 'Bulk/Rigid Plastics: Crates/Soda Carrier: #1 PET All',
 'Single Use: Compostable',
 'Expanded Polystyrene Containers & Packaging (EPS): Clamshells',
 'Bulk/Rigid Plastics: Toys/Housewares: #5 PP Expanded',
 'Thermoforms: #5 PP Expanded',
 'Expanded Polystyrene Containers & Packaging (EPS): Singe Use Other',
 'Rigid Plastics: Rigid Container/Pkg: #5 PP Expanded',
 'Drinking Cups - Paper and Plastic: Plastic - Durable: Other & Unlabeled',
 'Rigid Plastics: Tubs & Lids: Compostable',
 'Expanded Polystyrene Containers & Packaging (EPS): Single Use Cups',
 'Rigid Plastics: Other Plastics: #5 PP Expanded',
 'Bulk/Rigid Plastics: Other Durable: Other & Unlabeled',
 'Single Use: #4 LDP

In [30]:
# Filter MGP materials subsort
MGP_sub_df = cws_df.loc[(cws_df["Material Group"]=="Glass")|(cws_df["Material Group"]=="Plastic")
                              | (cws_df["Material Group"]=="Metal"),:]
# Filter MGP materials mainsort
MGP_main_df = cwm_df.loc[(cwm_df["Material Group"]=="Glass")|(cwm_df["Material Group"]=="Plastic")
                              | (cwm_df["Material Group"]=="Metal"),:]
# Group and calculate the mean of aggregate and refuse percentage per location
MGP_agg_subsort = MGP_sub_df.groupby("Location")["Aggregate Percent"].mean()
MGP_refuse_subsort = MGP_sub_df.groupby("Location")["Refuse Percent"].mean()
MGP_agg_mainsort = MGP_main_df.groupby("Location")["Aggregate Percent"].mean()
MGP_refuse_mainsort = MGP_main_df.groupby("Location")["Refuse Percent"].mean()

# Collection MGP will have Average capture rate taken from Recycling_Div_Captures DataFrame
MGP_avg_cap_rate = Recycling_Div_Captures.groupby("_zone")["MGP rate"].mean()

### LOAD
Create a non-relational dabase to load in MongoDB

In [31]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
materials_db = client["MaterialsDB"]
materials_coll = materials_db["materials"]

In [32]:
# Insert paper document in the database
materials_db.materials_coll.insert_one(
    {
        'material_group': "Paper",
        'year': 2017,
        'materials_list': paper_material_final,
        'Bronx':
        {
            'Subsort':
            {
                'Aggregate Percent': agg_subsort.Bronx,
                'Refuse Percent': refuse_subsort.Bronx,
            },
            'Capture rate': avg_cap_rate.Bronx
        },
        'Manhattan':
        {
            'Subsort':
            {
                'Aggregate Percent': agg_subsort.Manhattan,
                'Refuse Percent': refuse_subsort.Manhattan,
            },
            'Mainsort':
            {
                'Aggregate Percent': agg_mainsort.Manhattan,
                'Refuse Percent': refuse_mainsort.Manhattan,
            },
            'Capture rate': avg_cap_rate.Manhattan
        },
        'Queens':
        {
            'Subsort':
            {
                'Aggregate Percent': agg_subsort.Queens,
                'Refuse Percent': refuse_subsort.Queens,
            },
            'Capture rate': avg_cap_rate.Queens
        },
        'Brooklyn':
        {
            'Subsort':
            {
                'Aggregate Percent': agg_subsort.Brooklyn,
                'Refuse Percent': refuse_subsort.Brooklyn,
            },
            'Capture rate': avg_cap_rate.Brooklyn
        },
        'Staten Island':
        {
            'Subsort':
            {
                'Aggregate Percent': agg_subsort[7],
                'Refuse Percent': refuse_subsort[7],
            },
            'Capture rate': avg_cap_rate[4]
        }

    }
)

<pymongo.results.InsertOneResult at 0x2479566cc88>

In [33]:
# Insert MGP document in the database
materials_db.materials_coll.insert_one(
    {
        'material_group': "MGP (Metal, Glass, Plastic)",
        'year': 2017,
        'materials_list':
        {
            "Metal": Metals_final,
            "Glass": Glass_final,
            "Plastic": Plastic_final
        },
        'Bronx':
        {
            'Subsort':
            {
                'Aggregate Percent': MGP_agg_subsort.Bronx,
                'Refuse Percent': MGP_refuse_subsort.Bronx,
            },
            'Mainsort':
            {
                'Aggregate Percent': MGP_agg_mainsort.Bronx,
                'Refuse Percent': MGP_refuse_mainsort.Bronx,
            },
            'Capture rate': MGP_avg_cap_rate.Bronx
        },
        'Manhattan':
        {
            'Subsort':
            {
                'Aggregate Percent': MGP_agg_subsort.Manhattan,
                'Refuse Percent': MGP_refuse_subsort.Manhattan,
            },
            'Mainsort':
            {
                'Aggregate Percent': MGP_agg_mainsort.Manhattan,
                'Refuse Percent': MGP_refuse_mainsort.Manhattan,
            },
            'Capture rate': MGP_avg_cap_rate.Manhattan
        },
        'Queens':
        {
            'Subsort':
            {
                'Aggregate Percent': MGP_agg_subsort.Queens,
                'Refuse Percent': MGP_refuse_subsort.Queens,
            },
            'Mainsort':
            {
                'Aggregate Percent': MGP_agg_mainsort.Queens,
                'Refuse Percent': MGP_refuse_mainsort.Queens,
            },
            'Capture rate': MGP_avg_cap_rate.Queens
        },
        'Brooklyn':
        {
            'Subsort':
            {
                'Aggregate Percent': MGP_agg_subsort.Brooklyn,
                'Refuse Percent': MGP_refuse_subsort.Brooklyn,
            },
            'Mainsort':
            {
                'Aggregate Percent': MGP_agg_mainsort.Brooklyn,
                'Refuse Percent': MGP_refuse_mainsort.Brooklyn,
            },
            'Capture rate': MGP_avg_cap_rate.Brooklyn
        },
        'Staten Island':
        {
            'Subsort':
            {
                'Aggregate Percent': MGP_agg_subsort[7],
                'Refuse Percent': MGP_refuse_subsort[7],
            },
            'Mainsort':
            {
                'Aggregate Percent': MGP_agg_mainsort[7],
                'Refuse Percent': MGP_refuse_mainsort[7],
            },
            'Capture rate': MGP_avg_cap_rate[4]
        }

    }
)

<pymongo.results.InsertOneResult at 0x2479593ca88>