# Navigator

In [209]:
import json
import requests
import pandas as pd

from joblib import Parallel, delayed
from tqdm.notebook import tqdm

## Get All Organisations

In [32]:
with open('all_services.json', 'rb') as file:
    org_json = json.load(file)

In [33]:
all_orgs = []

for d in org_json:
    all_orgs.append(d['t'][0]['v'])

In [34]:
len(all_orgs)

373

In [213]:
pd.DataFrame(all_orgs, columns=['ncss_member'])

Unnamed: 0,ncss_member
0,365 Cancer Prevention Society
1,"*SCAPE Co., Ltd."
2,Abilities Beyond Limitations And Expectations ...
3,ADAM Association
4,Adullam Life Counselling
5,Adventist Home for the Elders
6,Adventist Nursing and Rehabilitation Centre
7,Agape Counselling And Training Centre
8,Agape Little Uni
9,Aidha Ltd


# Get All Programmes
1. Copy bash Curl request
2. Paste into Excel
3. Excel automatically structures requests based on:
    1. Start: y=0
    2. End: y=44482
    3. No. of pixels per row = 23
    4. No. of elements = 1934
4. Create bash script
5. Run and save to results.txt

In [106]:
all_lines = []
with open('resources/Navigator Data/results.txt', 'rb') as file:
    for line in file:
        all_lines.append(line)

In [109]:
from bs4 import BeautifulSoup
all_html = []
for line in all_lines:
    all_html.append(eval(eval(line.decode('utf-8').replace('false', 'False'))['vqlCmdResponse']['cmdResultList'][0]['commandReturn']['tooltipText'])['htmlTooltip'])

In [161]:
all_progs = []
all_desc = []
all_target = []

nonstandard = []

for l, line in enumerate(all_html):
    soup = BeautifulSoup(line)
    div_list = soup.find_all('div')
    if len(div_list) != 5:
        nonstandard.append(l)
    current = 'prog'
    prog = ''
    desc = ''
    target = ''
    for i, div in enumerate(div_list):
                
        if i == 0:
            current = 'prog'
        else:
            if prog != '' and desc == '' and target == '' and div.text == '':
                current = 'desc'
            elif prog != '' and desc != '' and target == '' and div.text[:14] == 'Target Clients':
                current = 'target'
        
        if current == 'prog':
            if prog == '':
                prog = div.text
            else:
                prog += ' ' + div.text
        elif current == 'desc':
            if desc == '':
                desc = div.text
            else:
                desc += ' ' + div.text
        elif current == 'target':
            if target == '':
                target = div.text
            else:
                target += ' ' + div.text
        
    # Append all
    all_progs.append(prog)
    all_desc.append(desc)
    all_target.append(target)

In [173]:
import pandas as pd
df = pd.DataFrame({
    'programmes': all_progs,
    'desc': all_desc,
    'target': all_target})

df['target'] = df.target.str.replace('Target Clients : ', '')

# Mendarki youth programmes have no info
df.loc[1000, 'desc'] = ''
df.loc[1000, 'target'] = ''

# df.to_csv('resources/Navigator Data/ncss_programmes.csv', index=False)

In [177]:
with open('resources/Navigator Data/family-services-geojson.geojson', 'rb') as file:
    fsc = json.load(file)

In [204]:
all_feats = []
for feat in fsc['features']:
    keys = [x.find('th').text for x in BeautifulSoup(feat['properties']['Description']).find_all('tr')[1:]]
    values = [x.find('td').text for x in BeautifulSoup(feat['properties']['Description']).find_all('tr')[1:]]
    all_feats.append({k: v for k,v in zip(keys, values)})

In [208]:
all_feats = pd.DataFrame(all_feats)

Unnamed: 0,ADDRESSBLOCKHOUSENUMBER,ADDRESSBUILDINGNAME,ADDRESSFLOORNUMBER,ADDRESSPOSTALCODE,ADDRESSSTREETNAME,ADDRESSTYPE,ADDRESSUNITNUMBER,DESCRIPTION,FMEL_UPD_D,HYPERLINK,INC_CRC,LANDXADDRESSPOINT,LANDYADDRESSPOINT,NAME,PHOTOURL
0,,,,90027,Blk 27 Telok Blangah Way #01-1018,,,Family Services,20190904144317,,436F9375C9D73A75,0,0,@27 Family Service Centre,
1,,,,50004,Blk 4 Sago Lane #03-101,,,Family Services,20190904144317,,34A84769D89F2359,0,0,Kreta Ayer Family Services at SSO@Kreta Ayer,
2,,,,460426,Blk 426 Bedok North Road #01-515,,,Family Services,20190904144317,,DDE982AB1B962E81,0,0,MSF Family Services at SSO@Bedok (Kembangan Ch...,
3,,,,560230,Blk 230 Ang Mo Kio Ave 3 #01-1264,,,Family Services,20190804123022,http://www.amkfsc.org.sg/,D200BB923511EAD8,0,0,Ang Mo Kio Family Service Centre,
4,,,,560643,Blk 643 Ang Mo Kio Ave 5 #01-3001,,,Family Services,20190804123022,http://www.amkfsc.org.sg/,C142C1D3CF5D9EC8,0,0,Ang Mo Kio Family Service Centre,
5,,,,321107,Blk 107 Towner Road #01-356,,,Family Services,20190804123022,http://www.awwa.org.sg,DC4611B2C96A12CA,0,0,AWWA Family Service Centre,
6,,,,730718,Blk 718 Woodlands Ave 6 #01-658 S(730718),,,Family Services,20190804123022,http://www.carecorner.org.sg/,82F0F611C0ADB9C2,0,0,Care Corner Family Service Centre (Admiralty),
7,,,,141088,Blk 88 Tanglin Halt Road #05-01,,,Family Services,20190804123022,http://www.carecorner.org.sg/,6C304E098972FF99,0,0,Care Corner Family Service Centre (Queenstown),
8,,,,522299,Blk 299B Tampines Street 22,,,Family Services,20190804123022,http://www.carecorner.org.sg/fsctampines.html,A5DC534A2588AE28,0,0,Care Corner Family Service Centre (Tampines),
9,,,,310158,Blk 158 Lor 1 Toa Payoh #01-1522,,,Family Services,20190804123022,http://www.carecorner.org.sg/,AC98CCC20DBDD8B2,0,0,Care Corner Family Service Centre (Toa Payoh),


In [24]:
import pandas as pd
import re
from bs4 import BeautifulSoup

In [87]:
all_lines = []
with open('resources/Scrape Programmes/results.txt', 'rb') as file:
    for line in file:
        all_lines.append(line)

In [136]:
all_links = []
all_orgs = []
for i, line in enumerate(all_lines):
    links_list = eval(eval(line.decode('utf-8').replace('false', 'False'))['vqlCmdResponse']['cmdResultList'][0]['commandReturn']['tooltipText'])['actions']['commandItems']
    text=''
    for link in links_list:
        
        if link['name'] == 'Click here for more information.':
            text = link['command']
            text = re.sub('.*url="', '', text)
            text = re.sub('" visual-id-pres.*', '', text)
            text = text.strip()
            if text == '':
                all_links.append('')
            else:
                all_links.append(text)
        
        if i in [266, 269, 274, 436, 894, 1868]:
            all_links.append('')
        
        if link['name'] == 'Click here for directions':
            text = link['command']
            text = re.sub('.*maps\?q=', '', text)
            text = re.sub('" visual-id-pres.*', '', text)
            text = text.strip()
            if text == '':
                all_orgs.append('')
            else:
                all_orgs.append(text)
                

In [141]:
df = pd.read_csv('resources/Scrape Programmes/ncss_programmes.csv')
df['link'] = all_links
df['link'] = df.link.str.replace('.sg/.*', '.sg/')
df['link'] = df.link.str.replace('.org/.*', '.org/')
df['link'] = df['link'].astype(str)
df.loc[~df.link.str.contains('facebook'), 'link'] = df.loc[~df.link.str.contains('facebook'), 'link'].str.replace('.com/.*', '.com/')

df['org'] = all_orgs
df.to_csv('resources/Scrape Programmes/ncss_programmes.csv', index=False)