### Exploratory Data Analysis of Clean Green Singapore (CGS) Recyclables Dataset

Source: https://www.cgs.gov.sg/recyclingsearch/data.json

In [1]:
import json
import pandas as pd
import bs4
import re
import pickle

# Function to serialise an object into a pickle file
def save_to_pickle(file_path, save_data):
    with open(file_path, 'wb') as file:
        pickle.dump(save_data, file)

# Function to deserialise a pickle file
def load_from_pickle(file_path):
    with open(file_path, 'rb') as file:
        loaded_data = pickle.load(file)
    return loaded_data

# Initialise dataset
cgs_data = None
with open('cgs_data.txt', 'r') as file:
    cgs_data = file.read()
    cgs_data = json.loads(cgs_data)

In [None]:
# Data Preprocessing
df_cgs = pd.DataFrame(cgs_data).rename(columns={'id': 'ID', 'materialType': 'Material', 'itemName': 'Item', 'canBePlaced': 'Recyclability', 'description': 'Description', 'suggestedWorkds': 'Related Words'})
df_cgs.drop(columns=['ID'], inplace=True)
df_cgs.replace({'YES!': 1, 'NOT IN ME!': 0, 'NO!': 0}, inplace=True)
df_cgs.replace({'Non Food Plastic': 'Plastic', 'Non-Food Plastic': 'Plastic'}, inplace=True)
df_cgs.replace({'F&B Plastic': 'Plastic'}, inplace=True)
for index, row in df_cgs.iterrows():
    description = row['Description']
    # Extract links
    match = re.match(r".+<a href=\'(.+)\' target.+>", description)
    if(match):
        description = description.replace("here", match.group(1))
    description = bs4.BeautifulSoup(description, "html.parser").get_text()
    description = ''.join([i for i in description.split('•')])
    description = '. '.join([i.strip().capitalize() for i in description.split('. ')])
    description = description.replace('  ', ' ')
    df_cgs.at[index, 'Description'] = description
# Get material-item mapping
dict_material_items = {}
for material in df_cgs['Material'].unique():
    sub_categories = list(df_cgs[df_cgs['Material'] == material]['Item'].unique())
    dict_material_items[material] = sub_categories

In [3]:
dict_material_items.keys()

dict_keys(['Paper', 'Others', 'Plastic', 'Glass', 'Metal'])

In [4]:
dict_material_items

{'Paper': ['Printed paper (Glossy and non-glossy)',
  'Writing paper',
  'Paper',
  'Newspaper',
  'Flyer (Glossy and non-glossy)',
  'Magazine (Glossy and non-glossy)',
  'Telephone directory',
  'Envelope (With and without plastic window)',
  'Plastic envelope',
  'Red packet',
  'Namecard',
  'Greeting card',
  'Shredded paper',
  'Paper receipt',
  'Carton box',
  'Cardboard box',
  'Printed paper box',
  'Paper egg trays',
  'Milk carton',
  'Drink packet',
  'Juice packet',
  'Paper towel tube',
  'Toilet roll tube',
  'Tissue box',
  'Paper bag',
  'Paper disposables',
  'Paper cup',
  'Paper plate',
  'Glitter paper',
  'Crayon drawing',
  'Tissue paper',
  'Paper towel',
  'Toilet paper',
  'Disposable wooden chopsticks',
  'Wooden chopsticks',
  'Pizza boxes',
  'Wax paper',
  'Paper packaging contaminated with food',
  'Paper packaging with food',
  'Newsletter',
  'Brochure (Glossy and non-glossy)',
  'Books',
  'Textbooks',
  'Calendar',
  'Gift wrapping paper',
  'Paper P

In [5]:
df_cgs

Unnamed: 0,Material,Item,Recyclability,Description,Related Words
0,Paper,Printed paper (Glossy and non-glossy),1,Make sure it is clean before recycling.,[Printed paper (Glossy and non-glossy)]
1,Paper,Writing paper,1,Make sure it is clean before recycling.,[Writing paper]
2,Paper,Paper,1,Make sure it is clean before recycling.,"[Paper, Exam Papers, Notes, Notebook, Mail, Le..."
3,Others,Clothes,0,Clothes should be donated if they are in good ...,"[T-Shirt, Shorts, Pants, Skirt, Jacket, Shirts..."
4,Paper,Newspaper,1,Make sure it is clean before recycling.,[Newspaper]
...,...,...,...,...,...
317,Others,Stationery,0,Donate if it is in good condition.,"[marker, whiteboard marker, permanent Marker, ..."
318,Others,Bulky waste,0,Donate if it is in good condition or contact t...,"[Bulky waste, cushion, Fan, Couch, Sofa, Chair..."
319,Others,Furniture,0,Donate if it is in good condition or contact t...,"[Fan, Couch, Sofa, Chair, Coffee Table, Mattre..."
320,Others,Sports shoes,0,Repair or donate your used shoes as far as pos...,"[Sneakers, Nike, Adidas, Asics, Puma, Reebok, ..."


In [6]:
save_to_pickle('df_recyclability.pkl', df_cgs)

In [8]:
save_to_pickle('dict_material_items.pkl', dict_material_items)