In [30]:
import asyncio
import json
import time
from pathlib import Path

import aiohttp
import numpy as np
import pandas as pd
import pendulum
import requests

from sqlalchemy import create_engine, select, text, and_

from sqlalchemy.orm import sessionmaker, declarative_base
from utils.utils import LOCAL_DATBASE_URL

In [31]:
Base = declarative_base()

In [32]:
engine = create_engine(LOCAL_DATBASE_URL)
SessionLocal = sessionmaker(bind=engine)

## Note:
See `240220_sl_surf_spots.ipynb` for spot getter

In [33]:
response = requests.get("https://services.surfline.com/taxonomy?type=taxonomy&id=58f7ed51dadb30820bb3879c&maxDepth=0")

* You will not get Surfline forecast data without a valid Surfline premium login. Add your credentials to `.env.development`:
  ```
  SURFLINE_EMAIL=xxx
  SURFLINE_PASSWORD=yyy
  ```

##### Requests

`https://services.surfline.com/kbyg/spots/forecasts/{type}?{params}`


Type|Data
----|----
rating|array of human-readable and numeric (0-6) ratings
wave|array of min/max sizes & optimal scores
wind|array of wind directions/speeds & optimal scores
tides|array of types & heights
weather|array of sunrise/set times, array of temperatures/weather conditions

Param|Values|Effect
-----|------|------
spotId|string|Surfline spot id that you want data for. A typical Surfline URL is `https://www.surfline.com/surf-report/venice-breakwater/590927576a2e4300134fbed8` where `590927576a2e4300134fbed8` is the `spotId`
days|integer|Number of forecast days to get (Max 6 w/o access token, Max 17 w/ premium token)
intervalHours|integer|Minimum of 1 (hour)
maxHeights|boolean|`true` seems to remove min & optimal values from the wave data output
sds|boolean|If true, use the new LOTUS forecast engine
accesstoken|string|Auth token to get premium data access (optional)

Anywhere there is an `optimalScore` the value can be interpreted as follows:

Value|Meaning
-----|-------
0|Suboptimal
1|Good
2|Optimal


In [34]:
types = ["rating", "wave", "wind", "tides", "weather"]
params = ["spotId", "days", "intervalHours", "maxHeights", "sds", "accesstoken"]
base = "https://services.surfline.com/kbyg/spots/forecasts"

In [35]:
datapath = Path('./data')


In [36]:
df = pd.read_csv(datapath/'spot_list.csv')


In [37]:
df.head()

Unnamed: 0.1,Unnamed: 0,ids,names,lon,lat,urls
0,0,584204204e65fad6a7709b5d,Dauphin Island,-88.117,30.229,https://www.surfline.com/surf-report/dauphin-i...
1,1,584204204e65fad6a7709b61,Spuds,-87.549,30.273,https://www.surfline.com/surf-report/spuds/584...
2,2,584204204e65fad6a7709b62,Alabama Point,-87.562,30.27,https://www.surfline.com/surf-report/alabama-p...
3,3,584204204e65fad6a7709b60,West Pass,-87.737,30.239,https://www.surfline.com/surf-report/west-pass...
4,4,65948156c329a78a0914a15e,Morgantown Beach,-87.91913,30.230299,https://www.surfline.com/surf-report/morgantow...


Get the spot `id` for 1st Street Jetty in Va Beach

In [38]:
jetty_id = df[df['names'].str.contains('1st Street Jetty', case=False, na=False)]['ids'].values[0]
jetty_id

'584204214e65fad6a7709ce7'

In [39]:
ex_params = {params[0]: jetty_id}
ex_params

{'spotId': '584204214e65fad6a7709ce7'}

Surfline seems to change their spot IDs periodically. Check a spot on the website and pass the objectId from the url as a param to debug if this is the case. If they've changed you'll need to run the notebook `240220_sl_surf_spots.ipynb` as mentioned above to refresh the spots dataset

In [40]:
debug_params = {params[0]: "584204214e65fad6a7709ce7"}

In [41]:
res = requests.get(f"{base}/{types[0]}", params=ex_params)
res.status_code

200

In [42]:
rating_json = res.json()

In [43]:
four_day_json = res.json()
if 'data' in four_day_json and 'rating' in four_day_json['data']:
    four_day_json['data']['rating'] = four_day_json['data']['rating'][:24]

In [44]:
def cull_extra_days(full_json):
    if 'data' in full_json and 'rating' in full_json['data']:
        full_json['data']['rating'] = full_json['data']['rating'][:24]

Drop extra days of forecast

In [45]:
cull_extra_days(four_day_json)

In [46]:
len(four_day_json['data']['rating'])

24

Convert a unix timestamp -> utc

In [47]:
pendulum.from_timestamp(rating_json['data']['rating'][0]['timestamp'], 'UTC')

DateTime(2024, 5, 27, 4, 0, 0, tzinfo=Timezone('UTC'))

In [48]:
pendulum.from_timestamp(rating_json['data']['rating'][int(72 / 3)-1]['timestamp'], 'UTC')

DateTime(2024, 5, 28, 3, 0, 0, tzinfo=Timezone('UTC'))

The `utcOffset` field seems to be aware that I'm working in EST currently. Either that or it's the time coding for the spot itself.

Let's check a west coast spot to confirm how this is handled

In [49]:
df

Unnamed: 0.1,Unnamed: 0,ids,names,lon,lat,urls
0,0,584204204e65fad6a7709b5d,Dauphin Island,-88.117000,30.229000,https://www.surfline.com/surf-report/dauphin-i...
1,1,584204204e65fad6a7709b61,Spuds,-87.549000,30.273000,https://www.surfline.com/surf-report/spuds/584...
2,2,584204204e65fad6a7709b62,Alabama Point,-87.562000,30.270000,https://www.surfline.com/surf-report/alabama-p...
3,3,584204204e65fad6a7709b60,West Pass,-87.737000,30.239000,https://www.surfline.com/surf-report/west-pass...
4,4,65948156c329a78a0914a15e,Morgantown Beach,-87.919130,30.230299,https://www.surfline.com/surf-report/morgantow...
...,...,...,...,...,...,...
1298,1298,640a2d14451905376297f483,Rutherford Beach,-93.124300,29.758500,https://www.surfline.com/surf-report/rutherfor...
1299,1299,5842041f4e65fad6a7708a1a,Assateague,-75.177040,38.148058,https://www.surfline.com/surf-report/assateagu...
1300,1300,5842041f4e65fad6a770886d,Ocean City Boardwalk,-75.081170,38.338461,https://www.surfline.com/surf-report/ocean-cit...
1301,1301,5842041f4e65fad6a7708a1b,North End to Ocean City Inlet,-75.080177,38.338890,https://www.surfline.com/surf-report/north-end...


In [50]:
la_jolla_id = df[df['names'].str.contains("La Jolla", case=False, na=False)]['ids'].values[0]
la_jolla_dict = {params[0]: la_jolla_id}

In [51]:
la_jolla_dict

{'spotId': '5842041f4e65fad6a77088cc'}

In [52]:
pendulum.now("utc")

DateTime(2024, 5, 28, 0, 44, 38, 124162, tzinfo=Timezone('UTC'))

In [53]:
new_dict = {"spot_id": "test", "spot_name": "test_2", "date": pendulum.now("utc"), "forecast": four_day_json}

In [54]:
spot_ratings = []
for spot_id, spot_name in df[['ids', 'names']][:3].values:
    res = requests.get(f"{base}/rating", params={'spotId': spot_id})
    data = res.json()
    cull_extra_days(data)
    current_date = pendulum.now("utc")
    utc_date = current_date.strftime("%Y-%m-%d")
    data['spot_id'] = spot_id
    data['spot_name'] = spot_name
    data['utc_fetch_date'] = utc_date
    spot_ratings.append(data)
    # time.sleep()

In [55]:
pendulum.from_timestamp(rating_json['data']['rating'][0]['timestamp'], 'UTC')

DateTime(2024, 5, 27, 4, 0, 0, tzinfo=Timezone('UTC'))

In [56]:
ratings_df = pd.json_normalize(spot_ratings, record_path=['data', 'rating'], meta=['spot_id', 'spot_name', 'utc_fetch_date'] )

In [57]:
ratings_df

Unnamed: 0,timestamp,utcOffset,rating.key,rating.value,spot_id,spot_name,utc_fetch_date
0,1716786000,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
1,1716789600,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
2,1716793200,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
3,1716796800,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
4,1716800400,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
...,...,...,...,...,...,...,...
67,1716854400,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
68,1716858000,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
69,1716861600,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
70,1716865200,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28


In [58]:
ratings_df['timestamp'] = ratings_df['timestamp'].apply(lambda x: pendulum.from_timestamp(x).to_date_string())

Alright, so it looks like each spot's forecast starts at 12am *local time*, with the timestamp for that time in unix. To figure out the flat `UTC` time for each spot you can just apply the `utcOffset` that is included in response. 

In [59]:
ratings_df

Unnamed: 0,timestamp,utcOffset,rating.key,rating.value,spot_id,spot_name,utc_fetch_date
0,2024-05-27,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
1,2024-05-27,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
2,2024-05-27,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
3,2024-05-27,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
4,2024-05-27,-5,POOR,1,584204204e65fad6a7709b5d,Dauphin Island,2024-05-28
...,...,...,...,...,...,...,...
67,2024-05-28,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
68,2024-05-28,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
69,2024-05-28,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28
70,2024-05-28,-5,POOR,1,584204204e65fad6a7709b62,Alabama Point,2024-05-28


In [None]:
df

In [None]:
with SessionLocal() as db:
    matching_spots = []
    for spot in df['names']:
        stmt = text("""select * from spots where spot_name like :spot""")
        result = db.execute(stmt, {"spot": spot}).fetchall()
        if len(result) > 0:
            matching_spots.append(result)
    

In [None]:
len(matching_spots)

In [None]:
result