In [17]:
import requests

In [18]:
from bs4 import BeautifulSoup

In [19]:
data = requests.get('https://www.foodsafety.govt.nz/registers-lists/beekeepers/index.htm?setup_file=beekeepers-ssi.setup.cgi&rows_to_return=20000&submit_search=Search')

In [20]:
soup = BeautifulSoup(data.text, 'html.parser')

In [21]:
register = soup.find('table', {'class': 'data'})

In [22]:
dataTable = register.find_all('tr')

In [23]:
print(dataTable)

[<tr><th>ID</th><th>Name</th><th>Physical Address</th><th>Listing Date</th><th>Expiry Date</th><tr>
<td>BK00766</td>
<td>3 J's Honey</td>
<td>424 Huia Street, Camberley HASTINGS</td>
<td>30/03/2017</td>
<td>30/03/2019</td>
</tr>
<tr>
<td>BK00334</td>
<td>360 Degrees Bees Limited</td>
<td>2A Maungatawhiri Road RAGLAN</td>
<td>26/09/2016</td>
<td>26/09/2018</td>
</tr>
<tr>
<td>BK00383</td>
<td>7082 Apiaries Limited</td>
<td>11 Moa Street, Ahipara KAITAIA</td>
<td>25/10/2016</td>
<td>25/10/2019</td>
</tr>
<tr>
<td>BK00684</td>
<td>A &amp; L Pohio Holdings Limited</td>
<td>39 Marama Street, Frankton HAMILTON</td>
<td>27/02/2017</td>
<td>27/02/2019</td>
</tr>
<tr>
<td>BK00586</td>
<td>A J and L M Scott Trading As Scott Apiaries</td>
<td>1 Santa Maria Avenue, Mt Pleasant CHRISTCHURCH</td>
<td>26/01/2017</td>
<td>26/01/2019</td>
</tr>
<tr>
<td>BK00044</td>
<td>A J Richards Mr Bee Limited</td>
<td>14 Bastia Avenue WANGANUI</td>
<td>16/08/2016</td>
<td>16/08/2019</td>
</tr>
<tr>
<td>BK00883</td

In [24]:
table_header = dataTable[0].find_all('th')
fieldnames = []
for header in table_header:
    fieldnames.append(header.text.replace(" ",""))
fieldnames.extend(["geocodeSuccess", "latitude", "longitude"])
print(fieldnames)

['ID', 'Name', 'PhysicalAddress', 'ListingDate', 'ExpiryDate', 'geocodeSuccess', 'latitude', 'longitude']


In [32]:
from geopy import Nominatim, location
import time

pandasData = []
for tr in dataTable[1:]: #to miss the tr for header
    bkid = tr.find_all('td')[0].text.strip()
    Name = tr.find_all('td')[1].text.strip()
    PhysicalAddress = tr.find_all('td')[2].text.strip()
    ListingDate = tr.find_all('td')[3].text.strip()
    ExpiryDate = tr.find_all('td')[4].text.strip()
    geocode = Nominatim(user_agent='parnelandr@gmail.com').geocode(PhysicalAddress,timeout=100)
    #print(geocode.address[-20:])
    #print(type(geocode.address))
    latitude = None
    longitude = None
    if geocode is None:
        geocodeSuccess = 'Not Geocoded'
    elif geocode.address[-20:] != "New Zealand/Aotearoa":
        geocodeSuccess = 'Not Geocoded to New Zealand'
    else:
        geocodeSuccess = 'Geocoded to New Zealand'
        latitude = geocode.latitude
        longitude = geocode.longitude
    pandasData.append([bkid, Name, PhysicalAddress, ListingDate, ExpiryDate, geocodeSuccess, latitude, longitude])
    #print(bkid, Name, PhysicalAddress, ListingDate, ExpiryDate, geocodeSuccess, latitude, longitude)
    time.sleep(0.1)
#print(pandasData)

In [33]:
import pandas as pd
df = pd.DataFrame(data=pandasData, columns=fieldnames)

Unnamed: 0,ID,Name,PhysicalAddress,ListingDate,ExpiryDate,geocodeSuccess,latitude,longitude
0,BK00766,3 J's Honey,"424 Huia Street, Camberley HASTINGS",30/03/2017,30/03/2019,Geocoded to New Zealand,-39.629540,176.818392
1,BK00334,360 Degrees Bees Limited,2A Maungatawhiri Road RAGLAN,26/09/2016,26/09/2018,Geocoded to New Zealand,-37.823184,174.889478
2,BK00383,7082 Apiaries Limited,"11 Moa Street, Ahipara KAITAIA",25/10/2016,25/10/2019,Not Geocoded,,
3,BK00684,A & L Pohio Holdings Limited,"39 Marama Street, Frankton HAMILTON",27/02/2017,27/02/2019,Geocoded to New Zealand,-37.791568,175.271040
4,BK00586,A J and L M Scott Trading As Scott Apiaries,"1 Santa Maria Avenue, Mt Pleasant CHRISTCHURCH",26/01/2017,26/01/2019,Geocoded to New Zealand,-43.563998,172.722000
5,BK00044,A J Richards Mr Bee Limited,14 Bastia Avenue WANGANUI,16/08/2016,16/08/2019,Geocoded to New Zealand,-39.927285,175.068721
6,BK00883,A N Williams,22 Adamson Road TAIPA,27/10/2017,27/10/2019,Geocoded to New Zealand,-34.993637,173.461259
7,BK00780,A R Mark Limited,"4 Tui Glen Road, Atawhai NELSON",10/04/2017,10/04/2019,Not Geocoded,,
8,BK00192,AA Apiaries Limited,40 Bradford Street WAIHI,26/08/2016,26/08/2019,Geocoded to New Zealand,-37.399367,175.836308
9,BK00624,Aaron Brocklehurst Trust,"295B Henwood Road, Bell Block NEW PLYMOUTH",9/02/2017,9/02/2019,Geocoded to New Zealand,-39.059553,174.155170


In [46]:
df["ListingDate"] = pd.to_datetime(df["ListingDate"])
df["ExpiryDate"] = pd.to_datetime(df["ExpiryDate"])

In [48]:
dfForFeatureLayer = df[df["geocodeSuccess"]=="Geocoded to New Zealand"]

In [49]:
dfForFeatureLayer

Unnamed: 0,ID,Name,PhysicalAddress,ListingDate,ExpiryDate,geocodeSuccess,latitude,longitude
0,BK00766,3 J's Honey,"424 Huia Street, Camberley HASTINGS",2017-03-30,2019-03-30,Geocoded to New Zealand,-39.629540,176.818392
1,BK00334,360 Degrees Bees Limited,2A Maungatawhiri Road RAGLAN,2016-09-26,2018-09-26,Geocoded to New Zealand,-37.823184,174.889478
3,BK00684,A & L Pohio Holdings Limited,"39 Marama Street, Frankton HAMILTON",2017-02-27,2019-02-27,Geocoded to New Zealand,-37.791568,175.271040
4,BK00586,A J and L M Scott Trading As Scott Apiaries,"1 Santa Maria Avenue, Mt Pleasant CHRISTCHURCH",2017-01-26,2019-01-26,Geocoded to New Zealand,-43.563998,172.722000
5,BK00044,A J Richards Mr Bee Limited,14 Bastia Avenue WANGANUI,2016-08-16,2019-08-16,Geocoded to New Zealand,-39.927285,175.068721
6,BK00883,A N Williams,22 Adamson Road TAIPA,2017-10-27,2019-10-27,Geocoded to New Zealand,-34.993637,173.461259
8,BK00192,AA Apiaries Limited,40 Bradford Street WAIHI,2016-08-26,2019-08-26,Geocoded to New Zealand,-37.399367,175.836308
9,BK00624,Aaron Brocklehurst Trust,"295B Henwood Road, Bell Block NEW PLYMOUTH",2017-09-02,2019-09-02,Geocoded to New Zealand,-39.059553,174.155170
10,BK00560,Aaron Mark Fletcher - Fletcher Bees Limited,174B Jericho Road PUKEKOHE,2017-01-19,2019-01-19,Geocoded to New Zealand,-37.216636,174.958666
14,BK01158,Abuzz Apiaries Limited,1190 Omanawa Road TAURANGA,2018-07-25,2019-07-25,Geocoded to New Zealand,-37.858199,176.079605


In [50]:
from arcgis.geometry import SpatialReference, Point
from arcgis.features import SpatialDataFrame

In [51]:
spatial_reference = SpatialReference(wkid=4326)  # WGS84

In [52]:
sdf = SpatialDataFrame(
    data=dfForFeatureLayer, 
    geometry=df.apply(
        lambda row: Point(x=row.longitude, y=row.latitude, sr=spatial_reference), 
        axis=1
    )
)
sdf.head()

Unnamed: 0,ID,Name,PhysicalAddress,ListingDate,ExpiryDate,geocodeSuccess,latitude,longitude,SHAPE
0,BK00766,3 J's Honey,"424 Huia Street, Camberley HASTINGS",2017-03-30,2019-03-30,Geocoded to New Zealand,-39.62954,176.818392,"{'x': 176.8183924, 'y': -39.6295399, 'sr': {'w..."
1,BK00334,360 Degrees Bees Limited,2A Maungatawhiri Road RAGLAN,2016-09-26,2018-09-26,Geocoded to New Zealand,-37.823184,174.889478,"{'x': 174.8894778, 'y': -37.8231836, 'sr': {'w..."
3,BK00684,A & L Pohio Holdings Limited,"39 Marama Street, Frankton HAMILTON",2017-02-27,2019-02-27,Geocoded to New Zealand,-37.791568,175.27104,"{'x': 175.2710397, 'y': -37.7915682, 'sr': {'w..."
4,BK00586,A J and L M Scott Trading As Scott Apiaries,"1 Santa Maria Avenue, Mt Pleasant CHRISTCHURCH",2017-01-26,2019-01-26,Geocoded to New Zealand,-43.563998,172.722,"{'x': 172.7220002, 'y': -43.563998, 'sr': {'wk..."
5,BK00044,A J Richards Mr Bee Limited,14 Bastia Avenue WANGANUI,2016-08-16,2019-08-16,Geocoded to New Zealand,-39.927285,175.068721,"{'x': 175.068721, 'y': -39.9272846, 'sr': {'wk..."


In [53]:
from arcgis.gis import *
from IPython.display import display
import getpass

In [54]:
username = getpass.getpass()

········


In [55]:
password = getpass.getpass()

········


In [56]:
gis = GIS("https://mpi.maps.arcgis.com", username, password)

In [57]:
sdf.to_featurelayer("Listed Beekeeper Registry - Test", gis=gis, tags=['test','MPI website scrape', 'Python', 'ArcGIS Python API'])