### Dataset "Waste baskets" (plus "Waste baskets disposal type" and "Waste disposal type")

The geojson represents all the waste baskets and recycle bins in the Trento Province annotated by Open Street Map community users.  
The data was collected via Overpass Turbo by running the following script:

```
[out:json];
area["name"="Provincia di Trento"]->.a; // Define the Province of Trento
(
  node["amenity"="waste_basket"](area.a); // Waste baskets
  node["amenity"="recycling"](area.a); // Recycling points
  node["amenity"="waste_basket"]["waste:organic"="yes"](area.a); // Organic waste bins
  node["amenity"="recycling"]["recycling:glass"="yes"](area.a); // Glass recycling points
  node["amenity"="recycling"]["recycling:plastic"="yes"](area.a); // Plastic recycling points
  node["amenity"="recycling"]["recycling:paper"="yes"](area.a); // Paper recycling points
  node["amenity"="waste_disposal"](area.a); // Waste collection points
  node["amenity"="waste_collection_point"](area.a); // Waste collection bins
);
out body;
>;
out skel qt;
```


You may also look at the [map](https://overpass-turbo.eu/map.html?Q=%5Bout%3Ajson%5D%3B%0Aarea%5B%22name%22%3D%22Provincia+di+Trento%22%5D-%3E.a%3B+%2F%2F+Define+the+Province+of+Trento%0A%28%0A++node%5B%22amenity%22%3D%22waste_basket%22%5D%28area.a%29%3B+%2F%2F+Waste+baskets%0A++node%5B%22amenity%22%3D%22recycling%22%5D%28area.a%29%3B+%2F%2F+Recycling+points%0A++node%5B%22amenity%22%3D%22waste_basket%22%5D%5B%22waste%3Aorganic%22%3D%22yes%22%5D%28area.a%29%3B+%2F%2F+Organic+waste+bins%0A++node%5B%22amenity%22%3D%22recycling%22%5D%5B%22recycling%3Aglass%22%3D%22yes%22%5D%28area.a%29%3B+%2F%2F+Glass+recycling+points%0A++node%5B%22amenity%22%3D%22recycling%22%5D%5B%22recycling%3Aplastic%22%3D%22yes%22%5D%28area.a%29%3B+%2F%2F+Plastic+recycling+points%0A++node%5B%22amenity%22%3D%22recycling%22%5D%5B%22recycling%3Apaper%22%3D%22yes%22%5D%28area.a%29%3B+%2F%2F+Paper+recycling+points%0A++node%5B%22amenity%22%3D%22waste_disposal%22%5D%28area.a%29%3B+%2F%2F+Waste+collection+points%0A++node%5B%22amenity%22%3D%22waste_collection_point%22%5D%28area.a%29%3B+%2F%2F+Waste+collection+bins%0A%29%3B%0Aout+body%3B%0A%3E%3B%0Aout+skel+qt%3B%0A%0A) to find the exact location of the points.

In [1]:
import geopandas as gpd
import pandas as pd
import json

In [77]:
# Load the GeoJSON file
geojson_file = 'waste_baskets_original.geojson'
gdf = gpd.read_file(geojson_file)

In [49]:
gdf.shape

(3883, 84)

In [37]:
gdf.columns

Index(['id', '@id', 'access', 'addr:city', 'addr:country', 'addr:housename',
       'addr:housenumber', 'addr:postcode', 'addr:province', 'addr:street',
       'amenity', 'ash', 'backrest', 'bench', 'building', 'bus', 'check_date',
       'check_date:recycling', 'colour', 'covered', 'created_by',
       'departures_board', 'description', 'disused:highway',
       'disused:public_transport', 'fixme', 'highway', 'hiking', 'image',
       'indoor', 'layer', 'level', 'location', 'material', 'name', 'note',
       'opening_hours', 'operator', 'private', 'public_transport',
       'recycling:PET', 'recycling:aluminium', 'recycling:batteries',
       'recycling:beverage_cartons', 'recycling:books', 'recycling:bottles',
       'recycling:cans', 'recycling:cardboard', 'recycling:cartons',
       'recycling:clothes', 'recycling:drugs',
       'recycling:electrical_appliances', 'recycling:electrical_items',
       'recycling:food_waste', 'recycling:garden_pots',
       'recycling:garden_waste', '

In [69]:
# Check which columns contain only NaN or None values
nan_columns = gdf.isna().all()

# Print the columns that have only NaN/None values
print(nan_columns[nan_columns == True].index)

Index([], dtype='object')


In [78]:
# Get the list of columns
cols_to_drop = gdf.columns.tolist()

# Find the start and end indices for each range you want to drop
start1 = cols_to_drop.index('ash')
end1 = cols_to_drop.index('public_transport') + 1  # Include 'public_transport'

start2 = cols_to_drop.index('ref')
end2 = cols_to_drop.index('wheelchair') + 1  # Include 'wheelchair'

start3 = cols_to_drop.index('@id')
end3 = cols_to_drop.index('addr:street') + 1  # Include 'addr:street'

# Drop the columns in all ranges plus the '@id' column
gdf = gdf.drop(columns=cols_to_drop[start1:end1] + cols_to_drop[start2:end2] + cols_to_drop[start3:end3])


In [71]:
gdf.shape

(3883, 39)

In [72]:
gdf.columns

Index(['id', 'amenity', 'recycling:PET', 'recycling:aluminium',
       'recycling:batteries', 'recycling:beverage_cartons', 'recycling:books',
       'recycling:bottles', 'recycling:cans', 'recycling:cardboard',
       'recycling:cartons', 'recycling:clothes', 'recycling:drugs',
       'recycling:electrical_appliances', 'recycling:electrical_items',
       'recycling:food_waste', 'recycling:garden_pots',
       'recycling:garden_waste', 'recycling:glass', 'recycling:glass_bottles',
       'recycling:green_waste', 'recycling:lamps', 'recycling:magazines',
       'recycling:newspaper', 'recycling:organic', 'recycling:organic_waste',
       'recycling:paper', 'recycling:paper_packaging', 'recycling:plastic',
       'recycling:plastic_bottles', 'recycling:plastic_packaging',
       'recycling:scrap_metal', 'recycling:shoes',
       'recycling:small_appliances', 'recycling:tetrapak', 'recycling:waste',
       'recycling:wood', 'recycling_type', 'geometry'],
      dtype='object')

In [79]:
#Add the municipality for each basket
municipalities = gpd.read_file('Municipality.txt')
municipalities = municipalities.to_crs(epsg=4326)  # EPSG 4326 is a common CRS (WGS84)
gdf = gdf.to_crs(epsg=4326)

In [80]:
# Perform spatial join to find which municipality each waste basket is in
gdf = gpd.sjoin(gdf, municipalities[['name', 'geometry']], how='left', op='within')


  if await self.run_code(code, result, async_=asy):


In [82]:
# Drop the 'index_right' column
gdf = gdf.drop(columns=['index_right'])
# Rename 'name_left' column to 'municipality'
gdf = gdf.rename(columns={'name': 'municipality'})

# Check the updated result
print(gdf.head())

               id         amenity recycling:PET recycling:aluminium  \
0  node/261081646       recycling          None                None   
1  node/261081752       recycling          None                None   
2  node/261081757       recycling          None                None   
3  node/261086633  waste_disposal          None                None   
4  node/261086634       recycling          None                None   

  recycling:batteries recycling:beverage_cartons recycling:books  \
0                None                       None            None   
1                None                       None            None   
2                None                       None            None   
3                None                       None            None   
4                None                       None            None   

  recycling:bottles recycling:cans recycling:cardboard  ...  \
0              None           None                None  ...   
1              None           None    

In [None]:
# Load the JSON file
with open('Municipality.geojson', 'r') as file:
    data = json.load(file)

# Filter out records with an empty or missing 'name'
filtered_data = [
    record for record in data
    if record.get('properties', {}).get('name')  # Keeps records with a non-empty 'name'
]

# Save the updated JSON back to a file
with open('Cleaned_Municipality.geojson', 'w') as file:
    json.dump(filtered_data, file, indent=4)

print(f"Removed {len(data) - len(filtered_data)} records with empty or missing 'name'.")


Removed 211 records with empty or missing 'name'.


In [10]:
# Load the cleaned municipalities GeoJSON file
with open('Cleaned_Municipality.geojson', 'r') as geojson_file:
    geojson_data = json.load(geojson_file)

# Create a mapping of municipality names to ref:ISTAT
name_to_ref_istat_mapping = {
    record['properties']['name']: record['properties']['ref:ISTAT']
    for record in geojson_data
    if record.get('properties', {}).get('ref:ISTAT')  # Only include valid ref:ISTAT values
}


name_to_ref_istat_mapping

{'Samone': '22165',
 'Avio': '22007',
 'Ala': '22001',
 'Vallarsa': '22210',
 'Brentonico': '22025',
 'Trambileno': '22203',
 'Bondone': '22021',
 'Nago-Torbole': '22124',
 'Storo': '22183',
 'Mori': '22123',
 'Rovereto': '22161',
 'Riva del Garda': '22153',
 'Terragnolo': '22193',
 'Folgaria': '22087',
 'Arco': '22006',
 'Borgo Chiese': '22238',
 'Isera': '22098',
 'Ronzo-Chienis': '22135',
 'Nogaredo': '22127',
 'Ledro': '22229',
 'Pieve di Bono-Prezzo': '22234',
 'Tenno': '22191',
 'Villa Lagarina': '22222',
 'Castel Condino': '22045',
 'Pomarolo': '22144',
 'Volano': '22224',
 'Lavarone': '22102',
 'Valdaone': '22232',
 'Nomi': '22128',
 'Lusérn - Luserna': '22109',
 'Drena': '22078',
 'Besenello': '22013',
 'Levico Terme': '22104',
 'Caldonazzo': '22034',
 'Tione di Trento': '22199',
 'Fiavé': '22083',
 'Cimone': '22058',
 'Aldeno': '22003',
 'Grigno': '22095',
 'Bleggio Superiore': '22017',
 'Sella Giudicarie': '22246',
 'Dro': '22079',
 'Cavedine': '22053',
 'Altopiano della Vig

In [11]:
# Replace municipality names with ref:ISTAT values
gdf['municipality'] = gdf['municipality'].map(name_to_ref_istat_mapping)


print("The 'municipality' column has been replaced with 'ref:ISTAT' values.")

The 'municipality' column has been replaced with 'ref:ISTAT' values.


In [84]:
gdf.shape

(3883, 40)

In [83]:
gdf.columns

Index(['id', 'amenity', 'recycling:PET', 'recycling:aluminium',
       'recycling:batteries', 'recycling:beverage_cartons', 'recycling:books',
       'recycling:bottles', 'recycling:cans', 'recycling:cardboard',
       'recycling:cartons', 'recycling:clothes', 'recycling:drugs',
       'recycling:electrical_appliances', 'recycling:electrical_items',
       'recycling:food_waste', 'recycling:garden_pots',
       'recycling:garden_waste', 'recycling:glass', 'recycling:glass_bottles',
       'recycling:green_waste', 'recycling:lamps', 'recycling:magazines',
       'recycling:newspaper', 'recycling:organic', 'recycling:organic_waste',
       'recycling:paper', 'recycling:paper_packaging', 'recycling:plastic',
       'recycling:plastic_bottles', 'recycling:plastic_packaging',
       'recycling:scrap_metal', 'recycling:shoes',
       'recycling:small_appliances', 'recycling:tetrapak', 'recycling:waste',
       'recycling:wood', 'recycling_type', 'geometry', 'municipality'],
      dtype='obje

In [85]:
gdf['geometry'] = gdf['geometry'].apply(lambda x: x.wkt)

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


In [86]:
gdf.geometry

0       POINT (11.4362294 46.2814163)
1       POINT (11.4398494 46.2826576)
2       POINT (11.4399299 46.2815241)
3       POINT (11.4187814 46.2719774)
4       POINT (11.4173692 46.2729517)
                    ...              
3878     POINT (11.000973 45.9278191)
3879    POINT (11.4293755 46.0631927)
3880    POINT (11.1220095 46.0913459)
3881    POINT (11.1426697 46.0649704)
3882     POINT (11.1414906 46.065003)
Name: geometry, Length: 3883, dtype: object

In [87]:
# Isolate relevant columns
recycling_columns = [col for col in gdf.columns if col.startswith('recycling:')]
# Drop 'recycling_type' from the list of columns
recycling_columns = [col for col in recycling_columns if col != 'recycling_type']

In [88]:
for col in recycling_columns:
    print(gdf[col].unique())

[None 'yes']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'no']
[None 'yes']
[None 'yes']
[None 'yes']
[None 'yes']
[None 'no' 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes' 'no']
[None 'yes']
[None 'yes' 'no']
[None 'yes' 'no']
[None 'yes']
[None 'no']
[None 'yes']
[None 'yes' 'no']
[None 'yes']


In [89]:
# Map 'yes' to True, 'no' to False, and NaN to False (or keep NaN as-is)
gdf[recycling_columns] = gdf[recycling_columns].applymap(
    lambda x: True if x == 'yes' else False if x == 'no' else False
)

  gdf[recycling_columns] = gdf[recycling_columns].applymap(


In [90]:
# Check for overlapping recycling categories
gdf['overlap_count'] = gdf[recycling_columns].sum(axis=1)

# Find baskets with multiple recycling categories enabled
overlapping_baskets = gdf[gdf['overlap_count'] > 1]

overlapping_baskets.shape

(469, 41)

In [91]:
gdf = gdf.drop(columns=['overlap_count'])

Create the "Waste_disposal_type" database

In [97]:
# Define the categories with their descriptions
categories = [
    {"id": 1, "category": "Organic", "description": "Waste that is biodegradable and can be composted, such as food scraps and organic waste."},
    {"id": 2, "category": "Paper/Cardboard", "description": "Recyclable materials like paper, cardboard, newspapers, magazines, and packaging."},
    {"id": 3, "category": "Glass", "description": "Recyclable glass items, including bottles, jars, and containers."},
    {"id": 4, "category": "Metal", "description": "Recyclable metals like aluminum, cans, and scrap metal."},
    {"id": 5, "category": "Plastic", "description": "Recyclable plastic items, including PET, plastic bottles, packaging, and plastic bags."},
    {"id": 6, "category": "Textiles", "description": "Recyclable clothing and textiles, including old clothes and shoes."},
    {"id": 7, "category": "Electronic Waste", "description": "Old or broken electronics like appliances, small electronics, and batteries."},
    {"id": 8, "category": "Wood", "description": "Recyclable wood waste, including garden pots, wooden furniture, and construction debris."},
    {"id": 9, "category": "Construction Waste", "description": "Waste from construction and demolition activities, including debris and non-recyclable materials."},
    {"id": 10, "category": "Miscellaneous", "description": "Items that do not fit into other categories, such as tetrapaks, garden pots, and other special materials."},
]

# Convert the list to a DataFrame
category_df = pd.DataFrame(categories)

category_df.head()


Unnamed: 0,id,category,description
0,1,Organic,Waste that is biodegradable and can be compost...
1,2,Paper/Cardboard,"Recyclable materials like paper, cardboard, ne..."
2,3,Glass,"Recyclable glass items, including bottles, jar..."
3,4,Metal,"Recyclable metals like aluminum, cans, and scr..."
4,5,Plastic,"Recyclable plastic items, including PET, plast..."


In [98]:
# Save the DataFrame to a CSV file
category_df.to_csv("waste_disposal_type.csv", index=False)

In [93]:
# Create a mapping from recycling columns to their index in 'category_df'
recycling_category_mapping = {
    'recycling:food_waste': 1, 'recycling:organic': 1, 'recycling:organic_waste': 1, 'recycling:garden_waste': 1, 'recycling:green_waste': 1,
    'recycling:paper': 2, 'recycling:cardboard': 2, 'recycling:paper_packaging': 2, 'recycling:books': 2, 'recycling:magazines': 2, 'recycling:newspaper': 2,
    'recycling:glass': 3, 'recycling:glass_bottles': 3, 'recycling:aluminium': 4, 'recycling:cans': 4, 'recycling:scrap_metal': 4, 'recycling:PET': 5,
    'recycling:plastic': 5, 'recycling:plastic_bottles': 5, 'recycling:plastic_packaging': 5, 'recycling:beverage_cartons': 5, 'recycling:clothes': 6,
    'recycling:shoes': 6, 'recycling:batteries': 7, 'recycling:electrical_appliances': 7, 'recycling:electrical_items': 7, 'recycling:small_appliances': 7,
    'recycling:lamps': 7, 'recycling:wood': 8, 'recycling:garden_pots': 8, 'recycling:waste': 9, 'recycling:tetrapak': 10, 'recycling:bottles': 5, 
    'recycling:cartons': 2,  'recycling:drugs': 10   
}

In [94]:
# Initialize a list to store rows for the new dataset
basket_disposal_mapping = []

# Iterate over each row in the waste_basket DataFrame 
for _, row in gdf.iterrows():
    basket_id = row['id']  
    
    # Iterate over recycling columns to find enabled categories
    for col, category_index in recycling_category_mapping.items():
        if row[col] == True:  # Check if the basket supports this disposal type
            # Append the index from 'category_df' instead of category name
            basket_disposal_mapping.append({
                'waste_basket_id': basket_id,
                'waste_disposal_type_id': category_index  # Use the index
            })

# Create the new dataset as a DataFrame
waste_baskets_disposal_type = pd.DataFrame(basket_disposal_mapping)

# Optionally, map the index back to the category names for clarity
#waste_baskets_disposal_type['disposal_type'] = waste_baskets_disposal_type['disposal_type_index'].map(category_df.set_index('index')['category'])

# Display the first few rows of the new dataset
print(waste_baskets_disposal_type.head())

  waste_basket_id  waste_disposal_type_id
0  node/290936230                       1
1  node/290936230                       2
2  node/290936230                       3
3  node/290936230                       5
4  node/296366977                       1


In [95]:
# Add a new column 'id' with the index of each row
waste_baskets_disposal_type['id'] = waste_baskets_disposal_type.index
waste_baskets_disposal_type = waste_baskets_disposal_type[['id'] + [col for col in waste_baskets_disposal_type.columns if col != 'id']]


# Display the updated DataFrame
print(waste_baskets_disposal_type.head())


   id waste_basket_id  waste_disposal_type_id
0   0  node/290936230                       1
1   1  node/290936230                       2
2   2  node/290936230                       3
3   3  node/290936230                       5
4   4  node/296366977                       1


In [34]:
waste_baskets_disposal_type.to_csv("waste_baskets_disposal_type.csv", index=False)

In [96]:
# Save to CSV
gdf.to_csv("waste_baskets.csv", index=False)