## Scraping Latest Dengue Data

In [1]:
import requests
import selenium as sm
from bs4 import BeautifulSoup
from onemapsg import OneMapClient

import geopandas as gpd
import pandas as pd
import numpy as np
import json

import matplotlib.pyplot as plt

%matplotlib inline

### Installation of Packages

In [None]:
### Please install onemapsg package in order to run the codes below ###
# !pip install onemapsg

# documentation here: https://pypi.org/project/onemapsg/

In [None]:
### You will need an OneMap API account ###
# access token created 27 Mar 2023
# eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOjEwMTAzLCJ1c2VyX2lkIjoxMDEwMywiZW1haWwiOiJmaXNoZW50ZXJwcmlzZUBnbWFpbC5jb20iLCJmb3JldmVyIjpmYWxzZSwiaXNzIjoiaHR0cDpcL1wvb20yLmRmZS5vbmVtYXAuc2dcL2FwaVwvdjJcL3VzZXJcL3Nlc3Npb24iLCJpYXQiOjE2Nzk5MTE3MzcsImV4cCI6MTY4MDM0MzczNywibmJmIjoxNjc5OTExNzM3LCJqdGkiOiI0ZDc5NDE1YmVhYzlmMTJlNzUyMGY0N2M1MTk4MWFlMyJ9.4E_ntNYK3NMDqsuYvzjqFNAJOx_aJkDFEUYLyPky0eQ

### OneMap API Setup

In [12]:
# call OneMap API
OM_client = OneMapClient("fishenterprise@gmail.com", "GA-dsi-sg-9-2023")

In [6]:
# get themes data
OM_client.get_all_themes_info(more_info=False)

REFRESHING TOKEN. NEW EXPIRY: 1680170937


{'Theme_Names': [{'THEMENAME': 'MCE KPE Speed Camera',
   'QUERYNAME': 'mce_kpe_speed_camera',
   'ICON': 'icon_enforcementCam_20-01.jpg'},
  {'THEMENAME': 'Singapore Police Force Mobile Speed Cameras',
   'QUERYNAME': 'spf_msc',
   'ICON': 'SpeedCam_logo.gif'},
  {'THEMENAME': 'Liquor Control Zone(s) proclaimed under Liquor Control (Supply and Consumption) Act 2015',
   'QUERYNAME': 'liquor_control_zones',
   'ICON': 'Crest_20x20.jpg'},
  {'THEMENAME': 'Singapore Police Force - 32nd ASEAN Summit Istana Special Zone',
   'QUERYNAME': 'as_istana_core',
   'ICON': 'Crest_20x20.jpg'},
  {'THEMENAME': 'Singapore Police Force - 32nd ASEAN Summit Istana Special Event Area',
   'QUERYNAME': 'as_istana_buffer',
   'ICON': 'Crest_20x20.jpg'},
  {'THEMENAME': 'Inland Ash Scattering Gardens',
   'QUERYNAME': 'ias',
   'ICON': 'circle.png'},
  {'THEMENAME': 'CET Centres', 'QUERYNAME': 'cetcentres', 'ICON': 'WSQ.jpg'},
  {'THEMENAME': 'HDB Branches',
   'QUERYNAME': 'hdb_branches',
   'ICON': 'M090

### Functions

In [13]:
def get_datestamp(dic, fmt='%Y-%m-%d'):
    '''
    Function that gets the datestamp from the dictionary output scraped from OneMap
    
    Parameters:
    -----------
    dic: dictionary
        dictionary output from OneMap
        
    fmt: str
        format of datestamp
        Default = %Y-%m-%d
        
    Returns:
    --------
    str
        datestamp in format fmt
        
    '''
    
    import pandas as pd
    
    # check if dic is valid with SrchResults and DateTime keys
    try:
        # extract datestamp from dictionary
        datestamp = pd.to_datetime(dic['SrchResults'][0]['DateTime']['date'])
        # convert to SG time
        datestamp = datestamp - pd.DateOffset(hours=-8)
    except:
        print('Input is not a valid dictionary output from OneMap')
        return 
    
    # return the datestamp based on fmt format
    return datestamp.strftime(fmt)

In [14]:
def scrape_onemap_themes(client, theme):
    '''
    Function that scrapes the specified theme from OneMap
    
    Parameters:
    -----------
    client: onemapsg object
        OneMapClient object that has been instantiated
        
    theme: str or list
        theme(s) to be scraped
    
    Returns:
    --------
    dictionary
        dictionary output from OneMap
        if theme is a list, returns a dictionary of dictionaries
    
    '''
    
    from onemapsg import OneMapClient
    
    # create empty dictionary
    output = {}
    
    if (type(theme) is list):
        for item in theme:
            # check that requested theme(s) is available on OneMap
            if list(client.get_theme_info(item).keys())[0] == 'Theme_Names':
                # if theme name is valid, scrape output
                output[item] = client.retrieve_theme(item)
                print(f'Successfully scraped {item}')
            else: print(f'{item} is an invalid theme')
    else:
        # if theme is just a string, return just the scraped output after checking
        if list(client.get_theme_info(theme).keys())[0] == 'Theme_Names':
            output = client.retrieve_theme(theme)
            print(f'Successfully scraped {theme}')
        else: print(f'{theme} is an invalid theme')
    
    # return output
    return output

In [15]:
def save_onemap_themes(dic, theme, folder_path='../datasets'):
    '''
    Function that saves a dictionary of dictionary outputs from OneMap into individual json files with datestamps
    
    Parameters:
    -----------
    dic: dictionary
        either a single OneMap dictionary output or a dictionary of dictionary outputs
        
    theme: str or list
        theme(s) to be saved    
        
    folder_path: str
        location to save the files to
        
    Returns:
    --------
    Nil
    
    '''
    
    import json
    
    # check if it is dictionary of dictionaries or just a single OneMap output
    try:
        test = dic['SrchResults']
        datestamp = get_datestamp(dic)
        json.dump(dic, open(f'{folder_path}/{theme}_{datestamp}.json', 'w'))
        print(f'Successfully saved {theme}')
    except:
        for key, item in dic.items():
            datestamp = get_datestamp(item)
            json.dump(item, open(f'{folder_path}/{key}_{datestamp}.json', 'w'))
            print(f'Successfully saved {key}')
    
    return 

### Scraping

In [16]:
# define list of themes to scrape
themes = [
    'dengue_cluster',
    'denguecase_northeast_area',
    'denguecase_northwest_area',
    'denguecase_central_area',
    'denguecase_southeast_area',
    'denguecase_southwest_area',
    'breedinghabitat_northeast_area',
    'breedinghabitat_northwest_area',
    'breedinghabitat_central_area',
    'breedinghabitat_southeast_area',
    'breedinghabitat_southwest_area'
]

In [17]:
%%time
test = scrape_onemap_themes(OM_client, themes)
save_onemap_themes(test, themes)

REFRESHING TOKEN. NEW EXPIRY: 1680170937
Successfully scraped dengue_cluster
Successfully scraped denguecase_northeast_area
Successfully scraped denguecase_northwest_area
Successfully scraped denguecase_central_area
Successfully scraped denguecase_southeast_area
Successfully scraped denguecase_southwest_area
Successfully scraped breedinghabitat_northeast_area
Successfully scraped breedinghabitat_northwest_area
Successfully scraped breedinghabitat_central_area
Successfully scraped breedinghabitat_southeast_area
Successfully scraped breedinghabitat_southwest_area
Successfully saved dengue_cluster
Successfully saved denguecase_northeast_area
Successfully saved denguecase_northwest_area
Successfully saved denguecase_central_area
Successfully saved denguecase_southeast_area
Successfully saved denguecase_southwest_area
Successfully saved breedinghabitat_northeast_area
Successfully saved breedinghabitat_northwest_area
Successfully saved breedinghabitat_central_area
Successfully saved breeding