In [1]:
import pandas as pd
import json
import requests
import aiohttp
import asyncio

## Getting surfline spots

Taken from [ovalwood](https://github.com/ovalwood/surf_locations) and adapted to run a bit faster with async


In [2]:
response = requests.get("https://services.surfline.com/taxonomy?type=taxonomy&id=58f7ed51dadb30820bb3879c&maxDepth=0")
json_data = response.json()
json_contains = json_data['contains']
states = []
state_ids = []
state_urls = []
for x in json_contains:
    states.append(x['name'])
    state_ids.append(x['_id'])


In [3]:

for state_id in state_ids:
    state_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + state_id + "&maxDepth=0")
   

In [4]:
async def fetch(url, session):
    async with session.get(url) as response:
        return await response.json()

async def fetch_all(urls):
    data = []
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in urls:
            tasks.append(fetch(url, session))
        data = await asyncio.gather(*tasks)
    return data

In [5]:
state_data = await fetch_all(state_urls)

In [6]:
county_ids = []    
for state in state_data:
    state_contains = state['contains']
    for y in state_contains:
        county_ids.append(y['_id'])
      

In [7]:
county_urls = []
for county_id in county_ids:
    county_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + county_id + "&maxDepth=0")


In [8]:

county_data = await fetch_all(county_urls)

In [9]:
region_ids = []
region_names = []
for county in county_data:
    county_contains = county['contains']
    for z in county_contains:
        region_ids.append(z['_id'])
        region_names.append(z['name'])
    

In [10]:
region_urls = []
for region_id in region_ids:
    region_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + region_id + "&maxDepth=0")


In [11]:
region_data = await fetch_all(region_urls)

In [12]:
def find_val(data, target_key, target_value):
    for key, value in data.items():
        if isinstance(value, list):
            for item in value:
                if isinstance(item, dict):
                    return item

In [14]:
region_data[0]["geonames"]["adminName1"]


'Wisconsin'

In [15]:
spot_ids = []
spot_names = []
spot_address = []
spot_lon = []
spot_lat = []
spot_urls = []


In [16]:
len(spot_address), len(spot_ids), len(spot_names)

(0, 0, 0)

In [17]:

for region in region_data:
    region_contains = region['contains']
    if len(region_contains) == 0:
        spot_ids.append(region['_id'])
        spot_names.append(region['name'])
        spot_address.append("")
        region_associated = region['associated']
        region_links = region_associated['links']
        region_location = region['location']
        region_coordinates = region_location['coordinates']
        spot_lon.append(region_coordinates[0])
        spot_lat.append(region_coordinates[1])
        for i in region_links:
            if i['key'] == "www":
                spot_urls.append(i['href'])

In [18]:
df = pd.DataFrame({"ids": spot_ids, "names": spot_names, "lon": spot_lon, "lat": spot_lat, "urls": spot_urls})
df.to_csv('./data/spot_list.csv')

In [19]:
df.head()

Unnamed: 0,ids,names,lon,lat,urls
0,58f809addadb30820bd023e9,Madeline Island/Big Bay,-90.671978,46.810422,https://www.surfline.com/surf-report/madeline-...
1,58f80967dadb30820bcfdc1c,Grant Park,-87.845578,42.928,https://www.surfline.com/surf-report/grant-par...
2,58f8096ddadb30820bcfe2de,Rock Island,-86.819367,45.405882,https://www.surfline.com/surf-report/rock-isla...
3,58f80962dadb30820bcfd727,Bradford Beach,-87.872539,43.06085,https://www.surfline.com/surf-report/bradford-...
4,58f80963dadb30820bcfd7e9,Wind Point,-87.7573,42.782596,https://www.surfline.com/surf-report/wind-poin...


In [20]:
spot_dict = [{"lat": lat, "lng": lng, "spot_name": name, "street_address": name} for lat, lng, name, name in zip(spot_lat, spot_lon, spot_names, spot_names)]

In [21]:
legacy_spot_dict = [{"latitude": lat, "longitude": lng, "spot_name": name, "street_address": name} for lat, lng, name, name in zip(spot_lat, spot_lon, spot_names, spot_names)]

In [22]:
legacy_spot_dict

[{'latitude': 46.810422,
  'longitude': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Madeline Island/Big Bay'},
 {'latitude': 42.928,
  'longitude': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Grant Park'},
 {'latitude': 45.405882,
  'longitude': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Rock Island'},
 {'latitude': 43.06085,
  'longitude': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Bradford Beach'},
 {'latitude': 42.782596,
  'longitude': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point'},
 {'latitude': 42.724474,
  'longitude': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine'},
 {'latitude': 43.666424,
  'longitude': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Kohler-Andrae State Park'},
 {'latitude': 43.053112,
  'longitude': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'McKinley Beach'},
 {'latitude': 4

In [23]:
spot_dict

[{'lat': 46.810422,
  'lng': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Madeline Island/Big Bay'},
 {'lat': 42.928,
  'lng': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Grant Park'},
 {'lat': 45.405882,
  'lng': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Rock Island'},
 {'lat': 43.06085,
  'lng': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Bradford Beach'},
 {'lat': 42.782596,
  'lng': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point'},
 {'lat': 42.724474,
  'lng': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine'},
 {'lat': 43.666424,
  'lng': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Kohler-Andrae State Park'},
 {'lat': 43.053112,
  'lng': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'McKinley Beach'},
 {'lat': 43.390695,
  'lng': -87.86318,
  'spot_name': 'Port Washington',
  'street_address': 'Port Was

In [None]:
# async def post_spot(spot, session):
#         async with session.post('http://localhost:8000/addspot', json=spot) as response:
#              return await response.text()

# async def post_all(spot_dict):
#     data = []
#     async with aiohttp.ClientSession() as session:
#         tasks = []
#         for spot in spot_dict:
#             tasks.append(post_spot(spot, session))
#         data = await asyncio.gather(*tasks)
#     return data


In [39]:
df = pd.DataFrame(legacy_spot_dict)

In [40]:
df_lat_lon = df[['latitude', 'longitude']]

In [41]:
df_lat_lon.head(), df_lat_lon.shape

(    latitude  longitude
 0  46.810422 -90.671978
 1  42.928000 -87.845578
 2  45.405882 -86.819367
 3  43.060850 -87.872539
 4  42.782596 -87.757300,
 (1320, 2))

In [42]:
import numpy as np

In [54]:
dfs = np.array_split(df, 3)
df_1, df_2, df_3 = dfs[0], dfs[1], dfs[2]


In [52]:
df_1.head()

Unnamed: 0,latitude,longitude,spot_name,street_address
0,46.810422,-90.671978,Madeline Island/Big Bay,Madeline Island/Big Bay
1,42.928,-87.845578,Grant Park,Grant Park
2,45.405882,-86.819367,Rock Island,Rock Island
3,43.06085,-87.872539,Bradford Beach,Bradford Beach
4,42.782596,-87.7573,Wind Point,Wind Point


In [53]:
df_1.to_csv('./data/df_1.csv')

In [55]:
df_2.to_csv('./data/df_2.csv')
df_3.to_csv('./data/df_3.csv')

In [56]:
# res = await post_all(spot_dict)
# res[0]

In [57]:
legacy_spot_dict

[{'latitude': 46.810422,
  'longitude': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Madeline Island/Big Bay'},
 {'latitude': 42.928,
  'longitude': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Grant Park'},
 {'latitude': 45.405882,
  'longitude': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Rock Island'},
 {'latitude': 43.06085,
  'longitude': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Bradford Beach'},
 {'latitude': 42.782596,
  'longitude': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point'},
 {'latitude': 42.724474,
  'longitude': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine'},
 {'latitude': 43.666424,
  'longitude': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Kohler-Andrae State Park'},
 {'latitude': 43.053112,
  'longitude': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'McKinley Beach'},
 {'latitude': 4

In [62]:
processed_files = ["./data/df-1-processed.csv", "./data/df-2-processed.csv", "./data/df-3-processed.csv"]

In [64]:
dfs = [pd.read_csv(file) for file in processed_files]


In [65]:
df = pd.concat(dfs, ignore_index=True)

In [68]:
df.shape

(1320, 24)

In [66]:
df.head()

Unnamed: 0,original_,original_latitude,original_longitude,original_spot_name,original_street_address,distance,formatted,lat,lon,housenumber,...,suburb,city,county,state,state_code,country,country_code,attribution,attribution_license,attribution_url
0,0,46.810422,-90.671978,Madeline Island/Big Bay,Madeline Island/Big Bay,369.461815,"Barrier Beach, La Pointe, WI, United States of...",46.809129,-90.67645,,...,,Town of La Pointe,Ashland County,Wisconsin,WI,United States,us,© OpenStreetMap contributors,Open Database License,https://www.openstreetmap.org/copyright
1,1,42.928,-87.845578,Grant Park,Grant Park,294.971131,"6260 South Lake Drive, Cudahy, WI 53110, Unite...",42.930542,-87.847633,6260.0,...,,Cudahy,Milwaukee County,Wisconsin,WI,United States,us,© OpenStreetMap contributors,Open Database License,https://www.openstreetmap.org/copyright
2,2,45.405882,-86.819367,Rock Island,Rock Island,326.092006,"Blueberry Trail, Washington, WI, United States...",45.410031,-86.822068,,...,,Town of Washington,Door County,Wisconsin,WI,United States,us,© OpenStreetMap contributors,Open Database License,https://www.openstreetmap.org/copyright
3,3,43.06085,-87.872539,Bradford Beach,Bradford Beach,59.414141,"Bradford Beach, 2400 North Lincoln Memorial Dr...",43.061203,-87.873088,2400.0,...,Northpoint,Milwaukee,Milwaukee County,Wisconsin,WI,United States,us,© OpenStreetMap contributors,Open Database License,https://www.openstreetmap.org/copyright
4,4,42.782596,-87.7573,Wind Point,Wind Point,127.947009,"645 Tower Circle, Wind Point, WI 53402, United...",42.782141,-87.75874,645.0,...,,Wind Point,Racine County,Wisconsin,WI,United States,us,© OpenAddresses contributors,BSD-3-Clause License,


In [77]:
df['city_state'] = df['city'] + ", " + df["state"]

In [78]:
df['city_state']

0        Town of La Pointe, Wisconsin
1                   Cudahy, Wisconsin
2       Town of Washington, Wisconsin
3                Milwaukee, Wisconsin
4               Wind Point, Wisconsin
                    ...              
1315     North Hampton, New Hampshire
1316           Hampton, New Hampshire
1317           Hampton, New Hampshire
1318          Seabrook, New Hampshire
1319           Hampton, New Hampshire
Name: city_state, Length: 1320, dtype: object

In [70]:
legacy_spot_dict[:2]

[{'latitude': 46.810422,
  'longitude': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Madeline Island/Big Bay'},
 {'latitude': 42.928,
  'longitude': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Grant Park'}]

In [79]:
for idx, spot in enumerate(legacy_spot_dict):
    spot["street_address"] = f"{df['city_state'][idx]}"

In [83]:
for idx, spot in enumerate(spot_dict):
    spot["street_address"] = f"{df['city_state'][idx]}"

In [84]:
spot_dict

[{'lat': 46.810422,
  'lng': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Town of La Pointe, Wisconsin'},
 {'lat': 42.928,
  'lng': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Cudahy, Wisconsin'},
 {'lat': 45.405882,
  'lng': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Town of Washington, Wisconsin'},
 {'lat': 43.06085,
  'lng': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Milwaukee, Wisconsin'},
 {'lat': 42.782596,
  'lng': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point, Wisconsin'},
 {'lat': 42.724474,
  'lng': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine, Wisconsin'},
 {'lat': 43.666424,
  'lng': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Wilson, Wisconsin'},
 {'lat': 43.053112,
  'lng': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'Milwaukee, Wisconsin'},
 {'lat': 43.390695,
  'lng': -87.86318,
  'spo

In [80]:
legacy_spot_dict[:5]

[{'latitude': 46.810422,
  'longitude': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Town of La Pointe, Wisconsin'},
 {'latitude': 42.928,
  'longitude': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Cudahy, Wisconsin'},
 {'latitude': 45.405882,
  'longitude': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Town of Washington, Wisconsin'},
 {'latitude': 43.06085,
  'longitude': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Milwaukee, Wisconsin'},
 {'latitude': 42.782596,
  'longitude': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point, Wisconsin'}]

In [81]:
with open('./data/legacy_spot_dict.json', 'w') as f:
    json.dump(legacy_spot_dict, f)

[{'lat': 46.810422,
  'lng': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Madeline Island/Big Bay'},
 {'lat': 42.928,
  'lng': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Grant Park'},
 {'lat': 45.405882,
  'lng': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Rock Island'},
 {'lat': 43.06085,
  'lng': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Bradford Beach'},
 {'lat': 42.782596,
  'lng': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point'},
 {'lat': 42.724474,
  'lng': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine'},
 {'lat': 43.666424,
  'lng': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Kohler-Andrae State Park'},
 {'lat': 43.053112,
  'lng': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'McKinley Beach'},
 {'lat': 43.390695,
  'lng': -87.86318,
  'spot_name': 'Port Washington',
  'street_address': 'Port Was

In [None]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [26]:
len(legacy_spot_dict)

1320

In [None]:
res = requests.post('http://localhost:8000/addspot', json=spot_dict)
res.content

In [None]:
await spot_dict

In [None]:
res = requests.get("http://localhost:8000/spots")
res.content