# Scrape the banking websites (7 June 2016)
This notebook explains how to scrape all the banking sites, including for ATMs. Code for scraping each is included. The list is as follows:

Branches:
- ANZ (done)
- ANZ (NZ) (done)
- ASB (NZ) (done)
- Aussie Home Loans (done)
- Bank of Queensland
- Bankwest (done)
- BNZ (NZ)
- Commonwealth Bank (done)
- Kiwi Bank (NZ)
- NAB
- NAB Business Centres
- Suncorp Bank
- Westpac

ATMS:
- ANZ ATM'S (done)
- ANZ ATM'S (NZ) (done)
- ASB ATM'S (NZ) (done)
- Bank of Queensland ATM's
- Bankwest ATM's (done)
- BNZ ATM'S (NZ)
- Cashcard ATM's
- CBA ATM'S
- Kiwi Bank ATM'S (NZ)
- NAB ATM'S
- RediATM
- Suncorp Bank ATM's
- Westpac ATM's



-----------------------------------
### Useful functions
These are just some general functions that are handy for dealing with strings, creating_meshes etc.

In [1]:
# Clean a strip to remove line breaks etc.
# remove all the tab spaces, line breaks and new lines from some text
def clean_str(str1):
    return str1.replace('\t','').replace('\r','').replace('\n','')

# Creates a mesh; mesh_width is the width of the grid. the max and min lat and lng values are based on those of Australia. If
# looking at another country then just change these accordingly
def make_mesh(mesh_width):
    # lat, lng values to cover Australia
    latMax = -10
    latMin = -43.6
    lngMax = 153.9
    lngMin = 112.8
    
    # total number of cells in mesh
    num_cells = mesh_width ** 2
    
    # the size of each block
    latDif = (latMax - latMin) / mesh_width
    lngDif = (lngMax - lngMin) / mesh_width
    
    # generate the lat, lng values for each block
    meshblocks = []
    
    for i in range(0, num_cells):
    # which row and column of the grid we are in
        ix0 = i % mesh_width
        ix1 = i % mesh_width + 1
        iy0 = i / mesh_width
        iy1 = i / mesh_width + 1
        latLower = latMin + float(iy0) * latDif
        latUpper = latMin + float(iy1) * latDif
        lngWest = lngMin + float(ix0) * lngDif
        lngEast = lngMin + float(ix1) * lngDif 
        meshblocks.append( {"upperLatitude": latUpper, "lowerLatitude": latLower, 
                      "eastLongitude": lngEast, "westLongitude": lngWest} )
        
    return meshblocks

# cover a rectangle with a disc
def rect_discs(rect):
    latMin, latMax = rect[0], rect[1]
    lngMin, lngMax = rect[2], rect[3]
    # centre
    latCentre = 0.5 * (latMin + latMax)
    lngCentre = 0.5 * (lngMin + lngMax)
    # upper right and lower left hand corner
    ne = (latMax, lngMax)
    sw = (latMin, lngMin)
    # find the distance between pairs of opposite sides of the rectangle
    radius = calc_dist(sw[0], ne[0], sw[1], ne[1])
    # return a circle with centre points and radius distLat (so the disc is large enough to cover the rectangle)
    return (latCentre, lngCentre, radius)
    
# Creates a mesh consisting of discs that cover a given region. Useful for sites that require (lat, lng) and radius specified
def make_discmesh(mesh_width):
    # first make a mesh of rectangles, then cover each with four discs.
    blocks = make_mesh(mesh_width)
    discs = []
    

### Function to calculate the lat,lng co-ordinates when moving a certain number of metres from existing lat, lng

Note that this is very rough, e.g., it assumes a spherical earth, so should not be used if accuracy is important

In [2]:
from math import asin, sin, cos, acos, pi, floor
from math import radians, degrees

# calculate the cosine of the angle between two lat, lng values
def cosang( lat0, lng0, lat1, lng1 ):
    lat0, lng0, lat1, lng1 = radians(lat0), radians(lng0), radians(lat1), radians(lng1)
    return ( cos(lat0) * cos(lat1) * cos(lng0 - lng1) + sin(lat0) * sin(lat1) )

# calculate the distance (in metres) between two points with lat, lng values of (lat0, lng0), (lat1, lng1) respectively.
# these are in degrees
def calc_dist(lat0, lng0, lat1, lng1):
    circ_earth = 40075 * 1000
    ang = acos( cosang( lat0, lng0, lat1, lng1 ) )
    return (ang * float(circ_earth) / (2 * pi) )

# don't need these anymore
def deg_to_rad(deg):
    return float(deg) * 2 * pi / float(360)

def rad_to_deg(rad):
    return float(rad) * float(360) / (2 * pi)

# take an angle, in radians, and output an angle between 0 and 2pi
def radmod(rad):
    k = floor( rad / (2 * pi) )
    return (rad - 2 * pi * k)

# This function gives the new lat, lng values if we
# move a distance d (in radians) at an angle tc
# from the original point
# Note: tc is true course which is in radians counter clockwise from North. 
# Important: The input and output angles must be in radians and the output is in radians
def get_latlng(lat_old, lng_old, d, tc):
    lat = asin(sin(lat_old)*cos(d)+cos(lat_old)*sin(d)*cos(tc))
    if cos(lat_old) == 0:
        lng = lon_old
    else:
        lng = radmod(lng_old-asin(sin(tc)*sin(d)/cos(lng_old))+pi)-pi
    return (lat, lng)


In [3]:
get_latlng(lat, lng, 0/ 40000, pi / 4)

NameError: name 'lat' is not defined

### Data specific to Australia
Some useful data specific to Australia

In [4]:
latMax = -10
latMin = -43.6
lngMax = 153.9
lngMin = 112.8

# roughly the centre of Australia
[lat_centre, lng_centre] = [-27.297927, 134.626141]
# radius needed for disc at centre to cover Australia
radius = 3000

# Banks and code for scraping each
---------------------------------------------------------

Below is the code for scraping each of the banks in the data refresh

### ANZ and ANZ ATMs

We can do this in single request, we just have to make sure that the value associated with the "cluster" key is empty,
and that the bbox value describes a box bounding all of Australia.

In [5]:
# Scrape ANZ
import requests
import json

url = 'http://data.nowwhere.com.au/v3/features/ANZ_AUST'

# so we get all of Australia
bbox = str(lngMin) + " " + str(latMin) + " " + str(lngMax) + " " + str(latMax)

# to get branches have 'filter': '(branch = 1 OR agency = 1)', to get ATMs have 'filter': '(atm = 1)'
params_anz = {"callback": "MapDS.CallBacks[1465646976932]", "key": "3o1J357b26JS9yW2Tw5ULCxfiCwRg13XRyQbv23H", 
         "sortby": "distance", "filter": "(branch = 1 OR agency = 1)", "cluster": "", 
          "bbox": bbox,
         "enc": "true", "nocache": "1465646874943"}

headers_anz = {"Referer": "http://www.locate.anz.com/anz/australia", "Cookie": "_ga=GA1.3.1897809232.1463645139"}
           
#, "Host": "data.nowwhere.com.au",
#          "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0",
#          "Accept": "*/*", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate",
#          "Cookie": "_ga=GA1.3.1897809232.1463645139", "Connection": "keep-alive"}

page = requests.get(url, params=params_anz, headers=headers_anz)

data_nice = page.content[31:-1].decode('utf-8')

# write file

f = open('anzoutput-150616.txt','w')
f.write(data_nice)
f.close()

UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 258752: ordinal not in range(128)

In [9]:
data_nice.count('uuid')

721

### ANZ (New Zealand) and their ATMs
url: http://data.nowwhere.com.au/v3/features/ANZ_NZ

- filter: 'branch = 1 AND company = "ANZ" AND active = "1"'
- cluster: ''
- IMPORTANT: must have headers

In [None]:
# data specific for New Zealand
bbox = '161.63658756250004 -47.83563888665546 179.999 -32.612062461269474'

In [13]:
import requests

url = 'http://data.nowwhere.com.au/v3/features/ANZ_NZ'

params = {'callback': 'MapDS.CallBacks[1465859021873]', 'key': '6na99llr46W2K42jB3lAG563kiNEuD5zn3jqj8u9',
         'sortby': 'distance', 'filter': 'branch = 1 AND company = "ANZ" AND active = "1"', 'cluster': '',
         'bbox': '161.63658756250004 -47.83563888665546 179.999 -32.612062461269474', 'enc': 'true', 
         'nocache': '1465859001262'}

headers = {'Host': 'data.nowwhere.com.au', 'Referer': 'http://www.locate.anz.com/anz/newzealand/', 
           'Accept-Encoding': 'gzip, deflate', 'Cookie': '_ga=GA1.3.2090984877.1450660365', 
           'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Accept': '*/*', 
           'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0'}
          
r = requests.get(url, params=params, headers=headers)
output = r.text

In [14]:
# how many site have we found
print("Found %d branches"%output.count('displayname'))

Found 220 sites


In [15]:
# for ATMS
import requests

url = 'http://data.nowwhere.com.au/v3/features/ANZ_NZ'

params = {'callback': 'MapDS.CallBacks[1465860238418]', 'key': '6na99llr46W2K42jB3lAG563kiNEuD5zn3jqj8u9',
         'sortby': 'distance', 'filter': 'atm = 1 AND active = "1"', 'cluster': '',
         'bbox': '161.63658756250004 -47.83563888665546 179.999 -32.612062461269474', 'enc': 'true', 
         'nocache': '1465859001262'}

headers = {'Host': 'data.nowwhere.com.au', 'Referer': 'http://www.locate.anz.com/anz/newzealand/', 
           'Accept-Encoding': 'gzip, deflate', 'Cookie': '_ga=GA1.3.2090984877.1450660365', 
           'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Accept': '*/*', 
           'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0'}
          
r = requests.get(url, params=params, headers=headers)
output = r.text

In [16]:
# how many ATMs have we found
print("Found %d ATMs"%output.count('displayname'))

Found 666 ATMs


### ASB (branches and ATMs)
GET request to

url: 'https://api.asb.co.nz/public/v1/locations?view=GeoSummary'

headers = {'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'en-US,en;q=0.8', 'apikey': 'l7xx106c7605d7f34e30af0017ca9c69be51', 'Connection': 'keep-alive',
'Host': 'api.asb.co.nz', 'Origin': 'https://www.asb.co.nz', 'Referer': 'https://www.asb.co.nz/tools/branchandatmlocator/location',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36'}

- ATMs have "type": "atm" (use ATMId as site reference?)
- Branches have "type": "branch" (site reference is 'BranchNumber')

In [19]:
import requests

url = 'https://api.asb.co.nz/public/v1/locations?view=GeoSummary'

headers = {'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'en-US,en;q=0.8', 'apikey': 'l7xx106c7605d7f34e30af0017ca9c69be51', 'Connection': 'keep-alive',
'Host': 'api.asb.co.nz', 'Origin': 'https://www.asb.co.nz', 'Referer': 'https://www.asb.co.nz/tools/branchandatmlocator/location',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36'}

r = requests.get(url, headers=headers)
output = r.text

In [20]:
# show the number of branches and ATMs
print("Found %d ATMs and %d branches" % ( output.count('"type":"atm"'), output.count('"type":"branch"')))

Found 358 ATMs and 133 branches


### Aussie Home Loans

GET requests (HTML)

Nested multiple URL scrape:
- get the urls from urls of the form 'http://www.aussie.com.au/find-store/[state].html' where [state] = 'nsw', 'vic', 'qld', 
'nt', 'wa', 'tas', 'sa', 'act'
- Then at each of these URLs we want all links '/find-store/[state]/[suburb].html'


ACTUALLY JUST NEED to scrape all URLs from single url, e.g., 'http://www.aussie.com.au/find-store/nsw.html'

In [76]:
import requests
from lxml import html
import pandas as pd

url = 'http://www.aussie.com.au/find-store/nsw.html'

# this is the path to specify to get the urls (similar to the data hierarchy in the Official GapMaps Webscraper)
path = './/ul[@class="panel-collapse collapse in"]//li//a'

# scrape the page
page = requests.get(url)

# parse the html to get the tree
tree = html.fromstring(page.content)

# now get all the urls and return the list
url_items = tree.findall(path)

# Now read off the urls
urls = []
for i in url_items:
    urls.append(i.get('href'))

# how many locations
print('Found %d locations' % len(urls) )

# the paths for the address and phone information of each site
path_addr = './/h2[@class="first-heading"]'
path_phone = './/div[@class="component-freetext"]//table//tr//td[@class="col-sm-3 col-md-3 col-lg-3"]//a//span'

url_base = 'http://www.aussie.com.au'

list_addr = []
list_phone = []

# now for each of the urls in urls, scrape the information

for i in urls:
    url = url_base + i
    page = requests.get(url)
    print("Scraping data from URL: %s" % url)
    tree = html.fromstring(page.text)
    addr = tree.findall(path_addr)
    phone = tree.findall(path_phone)
    list_addr.append( addr[0].text )
    list_phone.append( phone[0].text )
    
# Now output the data in a nice, delimited csv
data = {'url': urls, 'address': list_addr, 'phone': list_phone}
df1 = pd.DataFrame(data=data)
df1.to_csv('out-aussiehomeloands.csv', sep=',')


Found 187 locations


'http://www.aussie.com.au/find-store/nsw.html'

### Bank of Queensland

-----------
### BNZ

HTML request

Get the regions at
https://www.bnz.co.nz/locations

Start at 'li class="findlanding_browse_section_item"><a href="' 

end at '"'

Then for each URL we want ANOTHER URL found between
'a href="/locations/'
and
'"'

In [7]:
import requests

url_main = 'https://www.bnz.co.nz/locations'

path_addr = './/h2[@class="first-heading"]'
path_phone = './/div[@class="component-freetext"]//table//tr//td[@class="col-sm-3 col-md-3 col-lg-3"]//a//span'

# firstly get the URLs for each of the regions
path_reg = './/ul'
        

https://www.bnz.co.nz/locationswellingtonlocations/wellington#content
https://www.bnz.co.nz/locationscanterburylocations/canterbury#content
https://www.bnz.co.nz/locationswaikatolocations/waikato#content
https://www.bnz.co.nz/locationsaucklandlocations/auckland#content
https://www.bnz.co.nz/locationsmanawatu-wanganuilocations/manawatu-wanganui#content
https://www.bnz.co.nz/locationsnorthlandlocations/northland#content
https://www.bnz.co.nz/locationsbay-of-plentylocations/bay-of-plenty#content
https://www.bnz.co.nz/locationshawkes-baylocations/hawkes-bay#content
https://www.bnz.co.nz/locationstaranakilocations/taranaki#content
https://www.bnz.co.nz/locationsgisbornelocations/gisborne#content
https://www.bnz.co.nz/locationssouthlandlocations/southland#content
https://www.bnz.co.nz/locationswest-coastlocations/west-coast#content
https://www.bnz.co.nz/locationsotagolocations/otago#content
https://www.bnz.co.nz/locationstasman-nelson-marlboroughlocations/tasman-nelson-marlborough#content


In [36]:
print(url_reg)
print(url_base)

/locations/wellington
https://www.bnz.co.nz
locations/tasman-nelson-marlborough#content


---------------
### CBA 
For finding locations of branches:
- URL: http://service.commbank.com.au/locate-us/default.aspx/GetLocationsBySuburb
- json data: {"state":"NSW","suburb":"Corrimal"}

For finding locations of ATMs:
- URL: http://service.commbank.com.au/locate-us/default.aspx/GetLocationsForBranchesATMs
- json data: {"searchCriteria":"{\"IsBranches\":false,\"IsATMs\":true,\"Street\":\"\",\"SuburbOrPostCode\":\"VIC, Carlton, 3053\",\"FacilitiesList\":\"\",\"ATMFeaturesList\":\"\",\"IsWeekendTrading\":false,\"CentreLatitude\":-37.798389,\"CentreLongitude\":144.96944410000003}","bounds":""}

Need to make a request for each suburb in Australia (~16000 requests)

In [140]:
import pandas as pd

# For finding the 
url = 'http://service.commbank.com.au/locate-us/default.aspx/GetLocationsForBranchesATMs'
data = {"searchCriteria":"{\"IsBranches\":false,\"IsATMs\":true,\"Street\":\"\",\"SuburbOrPostCode\":\"VIC, Carlton, 3053\",\"FacilitiesList\":\"\",\"ATMFeaturesList\":\"\",\"IsWeekendTrading\":false,\"CentreLatitude\":-37.798389,\"CentreLongitude\":144.96944410000003}","bounds":""}

outdata = ''

# Read in the list of postcodes and states
state_postcode = pd.read_csv('Australian_Post_Codes_Lat_Lon.csv',sep=',')
states, suburbs = list(state_postcode[['state']].values[:,0]), list(state_postcode[['suburb']].values[:,0])
lats, lngs = list(state_postcode[['lat']].values[:,0]), list(state_postcode[['lon']].values[:,0])
pcs = list(state_postcode[['postcode']].values[:,0])

# run through the list of suburb, state pairs and make a request for each
num_pairs = len(states)

for i in range(0, 10):
    state = states[i]
    suburb = suburbs[i]
    lat, lng = lats[i], lngs[i]
    postcode = pcs[i]
    SuburbOrPostCode = str(state) + ", " + str(suburb) + ", " + str(postcode)
    page = requests.post(url, json=data)
    num_sites = page.content.count('"Latitude')
    outdata += page.content
    

--------------------
### Kiwi Bank

-------------------------
### NAB 

-------------------------------
### NAB Business Centres 

------------------
### Suncorp 

------------------------
### Westpac 

--------------------------
### RediATM 
GET request to 
URL = https://www.ehoundplatform.com/api/1.1/proximity_search
params = {"output": "json", "jsonp": "showLocs", "lat": -37.819, "lon": 145.1534, "count": 10000, "geo": "undefined", 
"service": "undefined", "max_distance": 0, "filters": "extra:eq:rediatm", "logic": "", "log_type": "web", "create_log": "true",
"api_key": "ra3354a55qw79up", "custom_logic": "undefined", "ch": 5568}

'Count' seems to max out at 100

Recursive scrape

In [37]:
url = 'https://www.ehoundplatform.com/api/1.1/proximity_search'

params = {"output": "json", "jsonp": "showLocs", "lat": -37.819, "lon": 145.1534, "count": 10000, "geo": "undefined", 
"service": "undefined", "max_distance": 0, "filters": "extra:eq:rediatm", "logic": "", "log_type": "web", "create_log": "true",
"api_key": "ra3354a55qw79up", "custom_logic": "undefined", "ch": 5568}

In [40]:
r = requests.get(url, params=params)
print(len(r.content))

14401


In [74]:
url = 'https://www.ehoundplatform.com/api/1.1/proximity_search'
params = {"output": "json", "jsonp": "showLocs", "lat": -37.819, "lon": 145.1534, "count": 100, "geo": "undefined", 
"service": "undefined", "max_distance": 1000, "filters": "extra:eq:rediatm", "logic": "", "log_type": "web", "create_log": "true",
"api_key": "ra3354a55qw79up", "custom_logic": "undefined", "ch": 5568}
r = requests.get(url, params=params)
print(len(r.content))

143986


In [75]:
print(r.content.count('loc_id'))

100


In [79]:
out_data = ''

# Divide a rectangle along the shortest direction (rect is a list of length 4 [upperLat, lowerLat, eastLng, westLng]), return
# two new rectangles
def nicedivide_rect(rect):
    upperLat = rect[0]
    lowerLat = rect[1]
    eastLng = rect[2]
    westLng = rect[3]
    
    latdif = upperLat - lowerLat
    lngdif = eastLng - westLng
    
    # check to see which is the shorter side of the rectangle and divide in two along this side
    if lngdif > latdif:
        eastLng1 = eastLng
        westLng1 = 0.5 * (eastLng + westLng)
        lowerLat1 = lowerLat
        upperLat1 = upperLat
        
        eastLng2 = 0.5 * (eastLng + westLng)
        westLng2 = westLng
        lowerLat2 = lowerLat
        upperLat2 = upperLat
    else:
        eastLng1 = eastLng
        westLng1 = westLng
        lowerLat1 = lowerLat
        upperLat1 = 0.5 * (lowerLat + upperLat)
        
        eastLng2 = eastLng
        westLng2 = westLng
        lowerLat2 = 0.5 * (lowerLat + upperLat)
        upperLat2 = upperLat
        
    return [ [upperLat1, lowerLat1, eastLng1, westLng1], [upperLat2, lowerLat2, eastLng2, westLng2]]

def get_locs(url, rect, data):
    r = requests.post(url, params=params, json=data)
    output = r.content
    num_sites = output.count('"loc_id":')
    if num_sites < 100:
        outdata += output
        print("Found %d locations"%num_sites)
    else:
        [rect1, rect2] = nicedivide_rect( rect )
        get_locs(url, rect1, data)
        get_locs(url, rect2, data)

-------------------------------------------------
### BankWest recursive scraping

The function get_locs recursively scrapes the BankWest website. You give it an initial rectangle in which to search for locations. If there are less than 100 in the rectangle then it is done, otherwise it makes two calls to itself, one for each half of the original rectangle. This repeats until it has found all the locations.

To search for ATMS, Branches or Business Banking Centres change the 'searchType' parameter to: 'ATM', 'STO' and 'BBC' respectively. You can input any subset of these

In [103]:
import requests

# max and min lats and lngs for Australia
latMax = -10
latMin = -43.6
lngMax = 153.9
lngMin = 112.8
    
# URL for website
url = 'http://www.bankwest.com.au/Locator/Service/Locator.asmx/BoundedLocations'

# initialise some parameters
params = {"latitude":-31.9535959,"longitude":115.85701180000001,"latitudeNorthEast":-31.931745692621774, 
                  "longitudeNorthEast":115.91108513374024,"latitudeSouthWest":-31.975440911111626,
                  "longitudeSouthWest":115.80293846625977, "searchType":["ATM"],"searchFacilities":[],
                  "numberOfOutOfRangeLocations":0}

# to keep track of the counts when we have found a rectangle with less than 100 locations.
counts = []
data = []

def get_locs(url, rect):
    # adjust the parameters we send so we are using the new rectangle
    lat_centre = 0.5 * (rect[0] + rect[1])
    lng_centre = 0.5 * (rect[2] + rect[3])
    lat_ne = rect[0]
    lat_sw = rect[1]
    lng_ne = rect[2]
    lng_sw = rect[3]
    params['latitude'] = lat_centre
    params['longitude'] = lng_centre
    params['latitudeNorthEast'] = lat_ne
    params['latitudeSouthWest'] = lat_sw
    params['longitudeNorthEast'] = lng_ne
    params['longitudeSouthWest'] = lng_sw
    # make a request
    r = requests.post(url, json=params)
    # read the text output
    output = r.content.decode('utf-8')
    # count the number of sites
    num_sites = output.count('"Id":')
 
    if num_sites < 100:
        print("Found %d locations"%num_sites)
        counts.append( num_sites )
        data.append(output)
    else:
        print("Found 100 locations in rectangle. Need to subdivide")
        [rect1, rect2] = nicedivide_rect( rect )
        get_locs(url, rect1)
        get_locs(url, rect2)

    
rect_au = [latMax, latMin, lngMax, lngMin]

In [104]:
get_locs(url, rect_au)

Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 7 locations
Found 100 locations in rectangle. Need to subdivide
Found 0 locations
Found 100 locations in rectangle. Need to subdivide
Found 0 locations
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 0 locations
Found 100 locations in rectangle. Need to subdivide
Found 0 locations
Found 100 locations in rectangle. Need to subdivide
Found 100 locations in rectangle. Need to subdivide
Found 28 locations
Found 82 locations
Found 100 locations in rectangle. Need to subdivide
Found 7 locations
Found 99 locations
Found 43 locations
Found 0 locations
Found 100 locations in rectangle. Need to subdivide
Found 100 location

In [107]:
# Print out the number of locations found
print("Found a total of %d locations"%sum(counts))

# remove line breaks, tab spaces and newlines from the data and output to a file
str_out = ''
for i in data:
    str_out += i
    
# delimit
str_out = clean_str(str_out).replace('"Id":','*')
f = open('BankWest-ATM-Scrape-070616.txt','w')
f.write(clean_str(str_out))
f.close()

Found a total of 948 locations


-------------------------------
### RediATM

Use GET requests with url and params

API requires specifying a lat, lng pair along with a radius. It returns a maximum of 100 sites. Use recursion with discs.

Algorithm:
- Start with a circle at the centre of Australia with a radius that covers Australia.
- Count number of locations within the circle
    - If this number is less than 100 we have found all within the circle
    - If not then divide the 4 subcircles which cover the original and repeat from previous step.
- Note that this algorithm produces duplicates

In [191]:
import requests
from math import pi, degrees, radians

url = 'https://www.ehoundplatform.com/api/1.1/proximity_search'

params = {'output': 'json', 'jsonp':'showLocs', 'lat': '-37.8194637', 'lon': '145.1534067999', 'count': '100',
          'geo': 'undefined', 'service': 'undefined', 'max_distance': '0', 'filters': 'extra:eq:rediatm',
          'logic':'', 'log_type': 'web', 'create_log': 'true', 'api_key': 'ra3354a55qw79up', 'custom_logic': 'undefined',
          'ch': '9314'}

# lat and lng for centre of Australia
lat_centre, lng_centre = -27.246769, 135.122951

# running counts and data for the requests made
counts = []
data = []

# take a given circle and return 4 overlapping circles of smaller radius; circle is a list of the form
# [lat, lng, radius]
def nicedivide_circ(circ):
    lat = radians(circ[0])
    lng = radians(circ[1])
    radius = circ[2]
    circum_earth = 40 * (10**6)
    
    radius = radius / 1.414
    
    # note we normalise the distance we move by 40000km (the circumference of the Earth) so that it is in radians
    # upper left
    lat1, lng1 = get_latlng(lat, lng, radius * 2 * pi / float(40000), pi / 4)
    
    # upper right
    lat2, lng2 = get_latlng(lat, lng, radius * 2 * pi / float(40000), 7*pi / 4)
    
    # lower left
    lat3, lng3 = get_latlng(lat, lng, radius * 2 * pi / float(40000), 3*pi / 4)    
    
    # lower right
    lat4, lng4 = get_latlng(lat, lng, radius * 2 * pi / float(40000), 5*pi / 4)
    
    return [ [degrees(lat1), degrees(lng1), radius], [degrees(lat2), degrees(lng2), radius], 
            [degrees(lat3), degrees(lng3), radius], [degrees(lat4), degrees(lng4), radius]]
    
def get_locs(url, circ):
    lat = circ[0]
    lng = circ[1]
    radius = circ[2]
    
    params['lat'] = lat
    params['lon'] = lng
    params['max_distance'] = radius
    
    # make a request
    page = requests.post(url, params=params)
    
    # read the text output
    output = page.text
    
    # count the number of sites using the loc_id tag
    num_sites = output.count('"loc_id":')
 
    # count the number of sites found in the circle. If this number is less than 100 then we have found all
    # inside the circle, otherwise we have to subdivide
    if (num_sites < 100 and num_sites > 0):
        print("Found %d locations"%num_sites)
        counts.append( num_sites )
        data.append(output)
    else:
        print("Found %d locations in circle. Need to subdivide"%num_sites)
        circ1, circ2, circ3, circ4 = nicedivide_circ( circ )
        get_locs(url, circ1)
        get_locs(url, circ2)
        get_locs(url, circ3)
        get_locs(url, circ4)
    
circle_au = [lat_centre, lng_centre, 2500]

In [192]:
get_locs(url, circle_au)


Found 100 locations in circle. Need to subdivide
Found 100 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide
Found 0 locations in circle. Need to subdivide


KeyboardInterrupt: 

In [135]:
url = 'https://www.ehoundplatform.com/api/1.1/proximity_search'
params = {'output': 'json', 'jsonp':'showLocs', 'lat': '-37.8194637', 'lon': '145.1534067999', 'count': '100',
          'geo': 'undefined', 'service': 'undefined', 'max_distance': '0', 'filters': 'extra:eq:rediatm',
          'logic':'', 'log_type': 'web', 'create_log': 'true', 'api_key': 'ra3354a55qw79up', 'custom_logic': 'undefined',
          'ch': '9314'}

params['max_distance'] = 10000
page = requests.get(url, params=params)


In [136]:
print(page.text.count('loc_id'))

100


-----------------------------------
### Bank of Queensland
This seems to work similarly to RediATM. The maximum number of locations returned by a request is 600 we again we use a recursion similar to RediATM. Note that the request will return locations of RediATMs aswell as BOQ ATMs.

In [187]:

print(str_out[:1000])

{"d":{"__type":"GeoLocation.Location.BoundedBankwestLocations","Locations":[{"__type":"GeoLocation.Location.BankwestLocation","Id":"1292487398973","FinancialInstitution":"BW","Type":"ATM","TrueType":"ATM","Address":"Corner Ashley Drive & Mcbryde Cres","Suburb":"Wanniassa","State":"ACT","Postcode":"2903","Access":"Available 24/7","AccessNotes":"24 hours 7 days a week","Deposit":"Non -deposit","Region":"Interstate","NumberOfAtm":"1","Facilities":["Non-Deposit","Self Select PIN","Available 24 hours a day"],"FacilityNotes":[],"Bsb":"","SelfSelectPin":"Self Select PIN","HlsImage":"","MobilePhone":"","Name":"7-Eleven Wanniassa","Longitude":149.10159,"Latitude":-35.40508,"Distance":485.87219785573222,"WithinRange":true,"BusinessHours":{"Mon":{"OpenAt":"","CloseAt":""},"Tue":{"OpenAt":"","CloseAt":""},"Wed":{"OpenAt":"","CloseAt":""},"Thu":{"OpenAt":"","CloseAt":""},"Fri":{"OpenAt":"","CloseAt":""},"Sat":{"OpenAt":"","CloseAt":""},"Sun":{"OpenAt":"","CloseAt":""}}},{"__type":"GeoLocation.Locat

In [15]:
import requests

url = 'http://find.boq.com.au/ziumsystem/xml.asp'
params = {"environmentid": "635", "originstate": "", "originx": "153.028145", "originy": "-27.468054", 
         "locationid": "", "groupid": "883", "useglobalpriority": "1"}

