In [15]:
import pandas as pd
import json
import requests
import aiohttp
import asyncio

## Getting surfline spots

Taken from [ovalwood](https://github.com/ovalwood/surf_locations) and adapted to run a bit faster with async


In [None]:
response = requests.get("https://services.surfline.com/taxonomy?type=taxonomy&id=58f7ed51dadb30820bb3879c&maxDepth=0")
json_data = response.json()
json_contains = json_data['contains']
states = []
state_ids = []
state_urls = []
for x in json_contains:
    states.append(x['name'])
    state_ids.append(x['_id'])


In [None]:

for state_id in state_ids:
    state_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + state_id + "&maxDepth=0")
   

In [None]:
async def fetch(url, session):
    async with session.get(url) as response:
        return await response.json()

async def fetch_all(urls):
    data = []
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in urls:
            tasks.append(fetch(url, session))
        data = await asyncio.gather(*tasks)
    return data

In [None]:
state_data = await fetch_all(state_urls)

In [None]:
county_ids = []    
for state in state_data:
    state_contains = state['contains']
    for y in state_contains:
        county_ids.append(y['_id'])
      

In [None]:
county_urls = []
for county_id in county_ids:
    county_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + county_id + "&maxDepth=0")


In [None]:

county_data = await fetch_all(county_urls)

In [None]:
region_ids = []
region_names = []
for county in county_data:
    county_contains = county['contains']
    for z in county_contains:
        region_ids.append(z['_id'])
        region_names.append(z['name'])
    

In [None]:
region_urls = []
for region_id in region_ids:
    region_urls.append("https://services.surfline.com/taxonomy?type=taxonomy&id=" + region_id + "&maxDepth=0")


In [None]:
region_data = await fetch_all(region_urls)

In [None]:
def find_val(data, target_key, target_value):
    for key, value in data.items():
        if isinstance(value, list):
            for item in value:
                if isinstance(item, dict):
                    return item

In [None]:
region_data[0]["geonames"]["adminName1"]


In [None]:
spot_ids = []
spot_names = []
spot_address = []
spot_lon = []
spot_lat = []
spot_urls = []


In [None]:
len(spot_address), len(spot_ids), len(spot_names)

In [None]:

for region in region_data:
    region_contains = region['contains']
    if len(region_contains) == 0:
        spot_ids.append(region['_id'])
        spot_names.append(region['name'])
        spot_address.append("")
        region_associated = region['associated']
        region_links = region_associated['links']
        region_location = region['location']
        region_coordinates = region_location['coordinates']
        spot_lon.append(region_coordinates[0])
        spot_lat.append(region_coordinates[1])
        for i in region_links:
            if i['key'] == "www":
                spot_urls.append(i['href'])

In [None]:
df = pd.DataFrame({"ids": spot_ids, "names": spot_names, "lon": spot_lon, "lat": spot_lat, "urls": spot_urls})
df.to_csv('./data/spot_list.csv')

In [None]:
df.head()

In [None]:
spot_dict = [{"lat": lat, "lng": lng, "spot_name": name, "street_address": name} for lat, lng, name, name in zip(spot_lat, spot_lon, spot_names, spot_names)]

In [None]:
legacy_spot_dict = [{"latitude": lat, "longitude": lng, "spot_name": name, "street_address": name} for lat, lng, name, name in zip(spot_lat, spot_lon, spot_names, spot_names)]

In [None]:
legacy_spot_dict

In [None]:
spot_dict

In [12]:
async def post_spot(spot, session, url):
        async with session.post(url, json=spot) as response:
             return await response.text()

async def post_all(spot_dict, url):
    data = []
    async with aiohttp.ClientSession() as session:
        tasks = []
        for spot in spot_dict:
            tasks.append(post_spot(spot, session, url))
        data = await asyncio.gather(*tasks)
    return data


In [None]:
df = pd.DataFrame(legacy_spot_dict)

In [None]:
df_lat_lon = df[['latitude', 'longitude']]

In [None]:
df_lat_lon.head(), df_lat_lon.shape

In [None]:
import numpy as np

In [None]:
dfs = np.array_split(df, 3)
df_1, df_2, df_3 = dfs[0], dfs[1], dfs[2]


In [None]:
df_1.head()

In [None]:
df_1.to_csv('./data/df_1.csv')

In [None]:
df_2.to_csv('./data/df_2.csv')
df_3.to_csv('./data/df_3.csv')

In [None]:
# res = await post_all(spot_dict)
# res[0]

In [None]:
legacy_spot_dict

In [None]:
processed_files = ["./data/df-1-processed.csv", "./data/df-2-processed.csv", "./data/df-3-processed.csv"]

In [None]:
dfs = [pd.read_csv(file) for file in processed_files]


In [None]:
df = pd.concat(dfs, ignore_index=True)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df['city_state'] = df['city'] + ", " + df["state"]

In [None]:
df['city_state']

In [None]:
legacy_spot_dict[:2]

In [None]:
for idx, spot in enumerate(legacy_spot_dict):
    spot["street_address"] = f"{df['city_state'][idx]}"

In [None]:
for idx, spot in enumerate(spot_dict):
    spot["street_address"] = f"{df['city_state'][idx]}"

In [None]:
spot_dict

In [None]:
legacy_spot_dict[:5]

In [None]:
with open('./data/legacy_spot_dict.json', 'w') as f:
    json.dump(legacy_spot_dict, f)

In [2]:
import json

In [7]:
with open('./data/legacy_spot_dict.json', 'r') as f:
    spot_dict = json.load(f)

In [8]:
updated_spots = [
    {'lat' if key == 'latitude' else 'lng' if key == 'longitude' else key: value for key, value in spot.items()}
    for spot in spot_dict
]

In [9]:
updated_spots

[{'lat': 46.810422,
  'lng': -90.671978,
  'spot_name': 'Madeline Island/Big Bay',
  'street_address': 'Town of La Pointe, Wisconsin'},
 {'lat': 42.928,
  'lng': -87.845578,
  'spot_name': 'Grant Park',
  'street_address': 'Cudahy, Wisconsin'},
 {'lat': 45.405882,
  'lng': -86.819367,
  'spot_name': 'Rock Island',
  'street_address': 'Town of Washington, Wisconsin'},
 {'lat': 43.06085,
  'lng': -87.872539,
  'spot_name': 'Bradford Beach',
  'street_address': 'Milwaukee, Wisconsin'},
 {'lat': 42.782596,
  'lng': -87.7573,
  'spot_name': 'Wind Point',
  'street_address': 'Wind Point, Wisconsin'},
 {'lat': 42.724474,
  'lng': -87.77727,
  'spot_name': 'Racine',
  'street_address': 'Racine, Wisconsin'},
 {'lat': 43.666424,
  'lng': -87.714441,
  'spot_name': 'Kohler-Andrae State Park',
  'street_address': 'Wilson, Wisconsin'},
 {'lat': 43.053112,
  'lng': -87.881361,
  'spot_name': 'McKinley Beach',
  'street_address': 'Milwaukee, Wisconsin'},
 {'lat': 43.390695,
  'lng': -87.86318,
  'spo

In [10]:
url = 'https://api.peterbull.org/addspot'

In [16]:
# response = await post_all(updated_spots, url)

AttributeError: 'list' object has no attribute 'content'

In [4]:
spots_json = json.load('./data/legacy_spot_dict.json')

AttributeError: 'str' object has no attribute 'read'

In [None]:
len(legacy_spot_dict)