In [1]:
import asyncio
import json
import logging
import time
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional

import aiohttp
import numpy as np
import pandas as pd
import pendulum
import requests
from sqlalchemy import (BigInteger, Boolean, Column, Float, Integer, String,
                        and_, create_engine, select, text)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import declarative_base, sessionmaker
# from utils.models import SlSpots

from utils.schemas import SlApiEndpoints, SlApiParams
from utils.sl_data import SurflineSpots, SpotForecast
from utils.utils import LOCAL_AIRFLOW_PG_URI, LOCAL_PG_URI

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
logging.basicConfig(level=logging.INFO)

In [4]:
Base = declarative_base()

In [5]:
engine = create_engine(LOCAL_AIRFLOW_PG_URI)
SessionLocal = sessionmaker(bind=engine)

## Note:
See `240220_sl_surf_spots.ipynb` for spot getter

In [6]:
response = requests.get("https://services.surfline.com/taxonomy?type=taxonomy&id=58f7ed51dadb30820bb3879c&maxDepth=0")

* You will not get Surfline forecast data without a valid Surfline premium login. Add your credentials to `.env.development`:
  ```
  SURFLINE_EMAIL=xxx
  SURFLINE_PASSWORD=yyy
  ```

##### Requests

`https://services.surfline.com/kbyg/spots/forecasts/{type}?{params}`


Type|Data
----|----
rating|array of human-readable and numeric (0-6) ratings
wave|array of min/max sizes & optimal scores
wind|array of wind directions/speeds & optimal scores
tides|array of types & heights
weather|array of sunrise/set times, array of temperatures/weather conditions

Param|Values|Effect
-----|------|------
spotId|string|Surfline spot id that you want data for. A typical Surfline URL is `https://www.surfline.com/surf-report/venice-breakwater/590927576a2e4300134fbed8` where `590927576a2e4300134fbed8` is the `spotId`
days|integer|Number of forecast days to get (Max 6 w/o access token, Max 17 w/ premium token)
intervalHours|integer|Minimum of 1 (hour)
maxHeights|boolean|`true` seems to remove min & optimal values from the wave data output
sds|boolean|If true, use the new LOTUS forecast engine
accesstoken|string|Auth token to get premium data access (optional)

Anywhere there is an `optimalScore` the value can be interpreted as follows:

Value|Meaning
-----|-------
0|Suboptimal
1|Good
2|Optimal


In [7]:
types = ["rating", "wave", "wind", "tides", "weather"]
params = ["spotId", "days", "intervalHours", "maxHeights", "sds", "accesstoken"]
base = "https://services.surfline.com/kbyg/spots/forecasts"

In [8]:
datapath = Path('./data')


In [9]:
df = pd.read_csv(datapath/'spot_list.csv')

In [10]:
df.head()

Unnamed: 0.1,Unnamed: 0,ids,names,lon,lat,urls
0,0,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,-80.28914,27.475313,https://www.surfline.com/surf-report/ft-pierce...
1,1,5842041f4e65fad6a7708aab,Lori Wilson Park,-80.60568,28.336757,https://www.surfline.com/surf-report/lori-wils...
2,2,5842041f4e65fad6a7708ab1,Vero Pier,-80.36027,27.670122,https://www.surfline.com/surf-report/vero-pier...
3,3,584204214e65fad6a7709cc4,Millennium Beach Park,-80.580575,28.142895,https://www.surfline.com/surf-report/millenniu...
4,4,5842041f4e65fad6a7708aa7,Playalinda,-80.62329,28.643973,https://www.surfline.com/surf-report/playalind...


Get the spot `id` for 1st Street Jetty in Va Beach

In [11]:
jetty_id = df[df['names'].str.contains('1st Street Jetty', case=False, na=False)]['ids'].values[0]
jetty_id

'584204214e65fad6a7709ce7'

In [12]:
ex_params = {params[0]: jetty_id}
ex_params

{'spotId': '584204214e65fad6a7709ce7'}

Surfline seems to change their spot IDs periodically. Check a spot on the website and pass the objectId from the url as a param to debug if this is the case. If they've changed you'll need to run the notebook `240220_sl_surf_spots.ipynb` as mentioned above to refresh the spots dataset

In [13]:
debug_params = {params[0]: "584204214e65fad6a7709ce7"}

In [14]:
res = requests.get(f"{base}/{types[0]}", params=ex_params)
res.status_code

200

In [15]:
type(res.json())

dict

In [16]:
rating_json = res.json()

In [17]:
four_day_json = res.json()
if 'data' in four_day_json and 'rating' in four_day_json['data']:
    four_day_json['data']['rating'] = four_day_json['data']['rating'][:24]

In [18]:
def cull_extra_days(full_json):
    if 'data' in full_json and 'rating' in full_json['data']:
        full_json['data']['rating'] = full_json['data']['rating'][:24]

Drop extra days of forecast

In [19]:
cull_extra_days(four_day_json)

In [20]:
len(four_day_json['data']['rating'])

24

Convert a unix timestamp -> utc

In [21]:
pendulum.from_timestamp(rating_json['data']['rating'][0]['timestamp'], 'UTC')

DateTime(2024, 7, 1, 4, 0, 0, tzinfo=Timezone('UTC'))

In [22]:
pendulum.from_timestamp(rating_json['data']['rating'][int(72 / 3)-1]['timestamp'], 'UTC')

DateTime(2024, 7, 2, 3, 0, 0, tzinfo=Timezone('UTC'))

The `utcOffset` field seems to be aware that I'm working in EST currently. Either that or it's the time coding for the spot itself.

Let's check a west coast spot to confirm how this is handled

In [23]:
df

Unnamed: 0.1,Unnamed: 0,ids,names,lon,lat,urls
0,0,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,-80.289140,27.475313,https://www.surfline.com/surf-report/ft-pierce...
1,1,5842041f4e65fad6a7708aab,Lori Wilson Park,-80.605680,28.336757,https://www.surfline.com/surf-report/lori-wils...
2,2,5842041f4e65fad6a7708ab1,Vero Pier,-80.360270,27.670122,https://www.surfline.com/surf-report/vero-pier...
3,3,584204214e65fad6a7709cc4,Millennium Beach Park,-80.580575,28.142895,https://www.surfline.com/surf-report/millenniu...
4,4,5842041f4e65fad6a7708aa7,Playalinda,-80.623290,28.643973,https://www.surfline.com/surf-report/playalind...
...,...,...,...,...,...,...
1301,1301,640a2d1945190521d497f5d8,Touhy Beach,-87.660750,42.012040,https://www.surfline.com/surf-report/touhy-bea...
1302,1302,584204204e65fad6a7709508,Tower Road,-87.729836,42.116911,https://www.surfline.com/surf-report/tower-roa...
1303,1303,584204204e65fad6a7709500,Promontory Point/57th Street Beach,-87.576692,41.794601,https://www.surfline.com/surf-report/promontor...
1304,1304,584204204e65fad6a7709509,Dempster Street,-87.669695,42.041746,https://www.surfline.com/surf-report/dempster-...


In [24]:
la_jolla_id = df[df['names'].str.contains("La Jolla", case=False, na=False)]['ids'].values[0]
la_jolla_dict = {params[0]: la_jolla_id}

In [25]:
la_jolla_dict

{'spotId': '5842041f4e65fad6a77088cc'}

In [26]:
pendulum.now("utc")

DateTime(2024, 7, 1, 22, 23, 2, 363663, tzinfo=Timezone('UTC'))

In [27]:
new_dict = {"spot_id": "test", "spot_name": "test_2", "date": pendulum.now("utc"), "forecast": four_day_json}

In [28]:
def fetch_from_sl_api(endpoint: SlApiEndpoints, param_type: SlApiParams, param: str):
    base_url = "https://services.surfline.com/kbyg/spots/forecasts"
    res = requests.get(f"{base_url}/{endpoint}", params={param_type: param})
    data = res.json()
    return data

In [29]:
test_res = fetch_from_sl_api(SlApiEndpoints.RATING.value, SlApiParams.SPOT_ID.value, param=jetty_id)

In [30]:
spot_ratings = []
for spot_id, spot_name in df[['ids', 'names']][:3].values:
    res = requests.get(f"{base}/rating", params={'spotId': spot_id})
    data = res.json()
    cull_extra_days(data)
    current_date = pendulum.now("utc")
    utc_date = current_date.strftime("%Y-%m-%d")
    data['spot_id'] = spot_id
    data['spot_name'] = spot_name
    data['utc_fetch_date'] = utc_date
    spot_ratings.append(data)
    # time.sleep()

In [31]:
pendulum.from_timestamp(rating_json['data']['rating'][0]['timestamp'], 'UTC')

DateTime(2024, 7, 1, 4, 0, 0, tzinfo=Timezone('UTC'))

In [32]:
ratings_df = pd.json_normalize(spot_ratings, record_path=['data', 'rating'], meta=['spot_id', 'spot_name', 'utc_fetch_date'] )

In [33]:
ratings_df

Unnamed: 0,timestamp,utcOffset,rating.key,rating.value,spot_id,spot_name,utc_fetch_date
0,1719806400,-4,POOR,1.000000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
1,1719810000,-4,POOR,0.833333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
2,1719813600,-4,POOR,0.666667,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
3,1719817200,-4,POOR,0.500000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
4,1719820800,-4,VERY_POOR,0.333333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
...,...,...,...,...,...,...,...
67,1719874800,-4,POOR,0.666667,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
68,1719878400,-4,POOR,0.833333,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
69,1719882000,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
70,1719885600,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01


In [34]:
ratings_df['timestamp'] = ratings_df['timestamp'].apply(lambda x: pendulum.from_timestamp(x).to_datetime_string())

Alright, so it looks like each spot's forecast starts at 12am *local time*, with the timestamp for that time in unix. To figure out the flat `UTC` time for each spot you can just apply the `utcOffset` that is included in response. 

In [35]:
ratings_df['timestamp'] = pd.to_datetime(ratings_df['timestamp'])

In [36]:
ratings_df

Unnamed: 0,timestamp,utcOffset,rating.key,rating.value,spot_id,spot_name,utc_fetch_date
0,2024-07-01 04:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
1,2024-07-01 05:00:00,-4,POOR,0.833333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
2,2024-07-01 06:00:00,-4,POOR,0.666667,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
3,2024-07-01 07:00:00,-4,POOR,0.500000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
4,2024-07-01 08:00:00,-4,VERY_POOR,0.333333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01
...,...,...,...,...,...,...,...
67,2024-07-01 23:00:00,-4,POOR,0.666667,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
68,2024-07-02 00:00:00,-4,POOR,0.833333,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
69,2024-07-02 01:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01
70,2024-07-02 02:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01


In [37]:
ratings_df['timestamp_utc'] = ratings_df.apply(lambda row: row['timestamp'] + pd.Timedelta(hours=row['utcOffset']), axis=1)

In [38]:
ratings_df

Unnamed: 0,timestamp,utcOffset,rating.key,rating.value,spot_id,spot_name,utc_fetch_date,timestamp_utc
0,2024-07-01 04:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01,2024-07-01 00:00:00
1,2024-07-01 05:00:00,-4,POOR,0.833333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01,2024-07-01 01:00:00
2,2024-07-01 06:00:00,-4,POOR,0.666667,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01,2024-07-01 02:00:00
3,2024-07-01 07:00:00,-4,POOR,0.500000,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01,2024-07-01 03:00:00
4,2024-07-01 08:00:00,-4,VERY_POOR,0.333333,5842041f4e65fad6a7708aa3,Ft. Pierce Inlet,2024-07-01,2024-07-01 04:00:00
...,...,...,...,...,...,...,...,...
67,2024-07-01 23:00:00,-4,POOR,0.666667,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01,2024-07-01 19:00:00
68,2024-07-02 00:00:00,-4,POOR,0.833333,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01,2024-07-01 20:00:00
69,2024-07-02 01:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01,2024-07-01 21:00:00
70,2024-07-02 02:00:00,-4,POOR,1.000000,5842041f4e65fad6a7708ab1,Vero Pier,2024-07-01,2024-07-01 22:00:00


In [39]:
ratings_df.dtypes

timestamp         datetime64[ns]
utcOffset                  int64
rating.key                object
rating.value             float64
spot_id                   object
spot_name                 object
utc_fetch_date            object
timestamp_utc     datetime64[ns]
dtype: object

In [40]:
with SessionLocal() as db:
    matching_spots = []
    for spot in df['names']:
        stmt = text("""select * from spots where spot_name like :spot""")
        result = db.execute(stmt, {"spot": spot}).fetchall()
        if len(result) > 0:
            matching_spots.append(result)
    

OperationalError: (psycopg2.OperationalError) connection to server at "localhost" (::1), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?
connection to server at "localhost" (127.0.0.1), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [41]:
len(matching_spots)

0

In [42]:
jetty_waves = fetch_from_sl_api(SlApiEndpoints.WAVE.value, SlApiParams.SPOT_ID.value, jetty_id)

In [43]:
jetty_waves['associated']

{'units': {'temperature': 'F',
  'tideHeight': 'FT',
  'swellHeight': 'FT',
  'waveHeight': 'FT',
  'windSpeed': 'KTS',
  'pressure': 'MB'},
 'utcOffset': -4,
 'location': {'lon': -75.96648, 'lat': 36.83036135089083},
 'forecastLocation': {'lon': -75.947, 'lat': 36.847},
 'offshoreLocation': {'lon': -75.75, 'lat': 37},
 'runInitializationTimestamp': 1719835200}

In [44]:
cull_extra_days(jetty_waves)

In [45]:
jetty_waves['data']['wave'] = jetty_waves['data']['wave'][:24]

In [46]:
len(jetty_waves['data']['wave'])

24

In [47]:
jetty_waves['associated']['spotId'] = jetty_id

In [48]:
jetty_waves['data']['spotId'] = jetty_id

In [49]:
jetty_meta_df = pd.json_normalize(jetty_waves)

In [50]:
jetty_meta_df.drop(['permissions.violations', 'permissions.data', 'data.wave', 'data.spotId'], inplace=True, axis=1)

In [51]:
jetty_meta_df

Unnamed: 0,associated.units.temperature,associated.units.tideHeight,associated.units.swellHeight,associated.units.waveHeight,associated.units.windSpeed,associated.units.pressure,associated.utcOffset,associated.location.lon,associated.location.lat,associated.forecastLocation.lon,associated.forecastLocation.lat,associated.offshoreLocation.lon,associated.offshoreLocation.lat,associated.runInitializationTimestamp,associated.spotId
0,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,36.847,-75.75,37,1719835200,584204214e65fad6a7709ce7


In [52]:
jetty_wave_df = pd.json_normalize(
    jetty_waves, record_path=["data", "wave"], meta=[["data", "spotId"]]
)
jetty_wave_df.drop("swells", inplace=True, axis=1)
jetty_wave_df.rename(columns={"power": "wave_power"}, inplace=True)

In [53]:
# jetty_wave_df['timestamp'] = jetty_wave_df['timestamp'].apply(lambda x: pendulum.from_timestamp(x).to_datetime_string())
# jetty_wave_df['timestamp'] = pd.to_datetime(jetty_wave_df['timestamp'])
# jetty_wave_df['timestamp_utc'] = jetty_wave_df.apply(lambda row: row['timestamp'] + pd.Timedelta(hours=row['utcOffset']), axis=1)

In [54]:
jetty_wave_df

Unnamed: 0,timestamp,probability,utcOffset,wave_power,surf.min,surf.max,surf.plus,surf.humanRelation,surf.raw.min,surf.raw.max,surf.optimalScore,data.spotId
0,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,2,584204214e65fad6a7709ce7
1,1719810000,100.0,-4,42.69708,2,3,False,Thigh to waist,1.44127,2.44127,2,584204214e65fad6a7709ce7
2,1719813600,100.0,-4,31.29579,2,3,False,Thigh to waist,1.23491,2.23491,2,584204214e65fad6a7709ce7
3,1719817200,96.666667,-4,24.76881,2,3,False,Thigh to waist,1.06135,2.02854,2,584204214e65fad6a7709ce7
4,1719820800,96.666667,-4,21.73813,1,2,False,Knee to thigh,0.9206,1.9206,0,584204214e65fad6a7709ce7
5,1719824400,96.666667,-4,19.17754,1,2,False,Knee to thigh,0.84547,1.81266,0,584204214e65fad6a7709ce7
6,1719828000,100.0,-4,16.83793,1,2,False,Knee to thigh,1.0,2.0,0,584204214e65fad6a7709ce7
7,1719831600,100.0,-4,14.8737,1,2,False,Knee to thigh,1.0,2.0,0,584204214e65fad6a7709ce7
8,1719835200,100.0,-4,17.57871,2,3,False,Thigh to waist,1.47244,2.47244,2,584204214e65fad6a7709ce7
9,1719838800,100.0,-4,19.94126,2,3,False,Thigh to waist,1.58399,2.58399,2,584204214e65fad6a7709ce7


In [55]:
jetty_swell_df = pd.json_normalize(
    jetty_waves,
    record_path=['data', 'wave', 'swells'],
    meta=[['data', 'wave', 'timestamp'], ['data', 'spotId']]
)

jetty_swell_df['swells_idx'] = jetty_swell_df.groupby('data.wave.timestamp').cumcount()

In [56]:
jetty_swell_df.head()

Unnamed: 0,height,period,impact,power,direction,directionMin,optimalScore,data.wave.timestamp,data.spotId,swells_idx
0,3.54331,5,0.65,44.51717,160.51,143.34,0,1719806400,584204214e65fad6a7709ce7,0
1,0.88583,8,0.3191,3.24415,107.79,103.01,0,1719806400,584204214e65fad6a7709ce7,1
2,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,2
3,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,3
4,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,4


In [57]:
jetty_swell_df.rename({"power": 'swell_power'}, inplace=True)

In [58]:
jetty_swell_df['data.wave.timestamp'].value_counts()

1719806400    6
1719810000    6
1719885600    6
1719882000    6
1719878400    6
1719874800    6
1719871200    6
1719867600    6
1719864000    6
1719860400    6
1719856800    6
1719853200    6
1719849600    6
1719846000    6
1719842400    6
1719838800    6
1719835200    6
1719831600    6
1719828000    6
1719824400    6
1719820800    6
1719817200    6
1719813600    6
1719889200    6
Name: data.wave.timestamp, dtype: int64

In [59]:
jetty_meta_df.head()

Unnamed: 0,associated.units.temperature,associated.units.tideHeight,associated.units.swellHeight,associated.units.waveHeight,associated.units.windSpeed,associated.units.pressure,associated.utcOffset,associated.location.lon,associated.location.lat,associated.forecastLocation.lon,associated.forecastLocation.lat,associated.offshoreLocation.lon,associated.offshoreLocation.lat,associated.runInitializationTimestamp,associated.spotId
0,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,36.847,-75.75,37,1719835200,584204214e65fad6a7709ce7


In [60]:
jetty_wave_df.head()

Unnamed: 0,timestamp,probability,utcOffset,wave_power,surf.min,surf.max,surf.plus,surf.humanRelation,surf.raw.min,surf.raw.max,surf.optimalScore,data.spotId
0,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,2,584204214e65fad6a7709ce7
1,1719810000,100.0,-4,42.69708,2,3,False,Thigh to waist,1.44127,2.44127,2,584204214e65fad6a7709ce7
2,1719813600,100.0,-4,31.29579,2,3,False,Thigh to waist,1.23491,2.23491,2,584204214e65fad6a7709ce7
3,1719817200,96.666667,-4,24.76881,2,3,False,Thigh to waist,1.06135,2.02854,2,584204214e65fad6a7709ce7
4,1719820800,96.666667,-4,21.73813,1,2,False,Knee to thigh,0.9206,1.9206,0,584204214e65fad6a7709ce7


In [61]:
jetty_swell_df.head(n=10)

Unnamed: 0,height,period,impact,power,direction,directionMin,optimalScore,data.wave.timestamp,data.spotId,swells_idx
0,3.54331,5,0.65,44.51717,160.51,143.34,0,1719806400,584204214e65fad6a7709ce7,0
1,0.88583,8,0.3191,3.24415,107.79,103.01,0,1719806400,584204214e65fad6a7709ce7,1
2,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,2
3,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,3
4,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,4
5,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,584204214e65fad6a7709ce7,5
6,3.31365,6,0.6496,39.48062,156.74,140.175,0,1719810000,584204214e65fad6a7709ce7,0
7,0.88583,8,0.3194,3.21646,108.01,103.22,0,1719810000,584204214e65fad6a7709ce7,1
8,0.0,0,0.0,0.0,0.0,0.0,0,1719810000,584204214e65fad6a7709ce7,2
9,0.0,0,0.0,0.0,0.0,0.0,0,1719810000,584204214e65fad6a7709ce7,3


In [62]:
combined_waves_df = pd.merge(
    jetty_wave_df,
    jetty_swell_df,
    how="inner",
    left_on=["timestamp", "data.spotId"],
    right_on=["data.wave.timestamp", 'data.spotId'],
)

In [63]:
len(combined_waves_df)

144

In [64]:
combined_waves_df.head()

Unnamed: 0,timestamp,probability,utcOffset,wave_power,surf.min,surf.max,surf.plus,surf.humanRelation,surf.raw.min,surf.raw.max,...,data.spotId,height,period,impact,power,direction,directionMin,optimalScore,data.wave.timestamp,swells_idx
0,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,...,584204214e65fad6a7709ce7,3.54331,5,0.65,44.51717,160.51,143.34,0,1719806400,0
1,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,...,584204214e65fad6a7709ce7,0.88583,8,0.3191,3.24415,107.79,103.01,0,1719806400,1
2,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,...,584204214e65fad6a7709ce7,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,2
3,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,...,584204214e65fad6a7709ce7,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,3
4,1719806400,100.0,-4,47.76132,2,3,False,Thigh to waist,1.54921,2.5164,...,584204214e65fad6a7709ce7,0.0,0,0.0,0.0,0.0,0.0,0,1719806400,4


In [65]:
combined_df = pd.merge(jetty_meta_df, combined_waves_df, how='cross')

In [66]:
combined_df

Unnamed: 0,associated.units.temperature,associated.units.tideHeight,associated.units.swellHeight,associated.units.waveHeight,associated.units.windSpeed,associated.units.pressure,associated.utcOffset,associated.location.lon,associated.location.lat,associated.forecastLocation.lon,...,data.spotId,height,period,impact,power,direction,directionMin,optimalScore,data.wave.timestamp,swells_idx
0,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,3.54331,5,0.6500,44.51717,160.51,143.340,0,1719806400,0
1,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.88583,8,0.3191,3.24415,107.79,103.010,0,1719806400,1
2,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.00000,0,0.0000,0.00000,0.00,0.000,0,1719806400,2
3,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.00000,0,0.0000,0.00000,0.00,0.000,0,1719806400,3
4,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.00000,0,0.0000,0.00000,0.00,0.000,0,1719806400,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.00000,0,0.0000,0.00000,0.00,0.000,0,1719889200,1
140,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,1.57480,5,0.3241,3.76677,48.39,37.495,0,1719889200,2
141,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.00000,0,0.0000,0.00000,0.00,0.000,0,1719889200,3
142,F,FT,FT,FT,KTS,MB,-4,-75.96648,36.830361,-75.947,...,584204214e65fad6a7709ce7,0.88583,8,0.3983,3.77554,109.16,104.095,0,1719889200,4


In [67]:
engine = create_engine(LOCAL_PG_URI)
SessionLocal = sessionmaker(bind=engine)

In [68]:
with SessionLocal() as db:
    stmt = select(SlSpots.spot_id)
    spots = db.execute(stmt).scalars().all()

NameError: name 'SlSpots' is not defined

In [69]:
def transform_sl_wave_data(data: Dict) -> pd.DataFrame:
    if not data:
        raise ValueError("Data is empty")

    meta_df = pd.json_normalize(data)
    meta_df.drop(
        ["permissions.violations", "permissions.data", "data.wave", "data.spotId"],
        inplace=True,
        axis=1,
    )

    wave_df = pd.json_normalize(
        jetty_waves, record_path=["data", "wave"], meta=[["data", "spotId"]]
    )
    wave_df.drop("swells", inplace=True, axis=1)
    wave_df.rename(columns={"power": "wave_power"}, inplace=True)

    swell_df = pd.json_normalize(
        jetty_waves,
        record_path=["data", "wave", "swells"],
        meta=[["data", "wave", "timestamp"], ["data", "spotId"]],
    )

    swell_df.rename(columns={"power": "swell_power"}, inplace=True)
    swell_df["swells_idx"] = swell_df.groupby("data.wave.timestamp").cumcount()

    combined_waves_df = pd.merge(
        wave_df,
        swell_df,
        how="inner",
        left_on=["timestamp", "data.spotId"],
        right_on=["data.wave.timestamp", "data.spotId"],
    )

    combined_df = pd.merge(meta_df, combined_waves_df, how='cross')

    return combined_df

In [70]:
data = []
for spot in spots[:2]:
    result = fetch_from_sl_api(SlApiEndpoints.WAVE.value, SlApiParams.SPOT_ID.value, param=spot)
    if result.get("associated"):
        result['associated']['spotId'] = spot
        result['data']['spotId'] = spot
    data.append(result)


NameError: name 'spots' is not defined

In [None]:
full_df = pd.concat([transform_sl_wave_data(entry) for entry in data])

In [None]:
full_df

In [None]:
full_df.columns

In [None]:
full_df.dtypes

In [None]:
# class SlRatings(Base):
#     __tablename__ = 'sl_ratings'

#     id = Column(Integer, primary_key=True, autoincrement=True)
#     associated_units_temperature = Column(String)
#     associated_units_tideHeight = Column(String)
#     associated_units_swellHeight = Column(String)
#     associated_units_waveHeight = Column(String)
#     associated_units_windSpeed = Column(String)
#     associated_units_pressure = Column(String)
#     associated_utcOffset = Column(Integer)
#     associated_location_lon = Column(Float)
#     associated_location_lat = Column(Float)
#     associated_forecastLocation_lon = Column(Float)
#     associated_forecastLocation_lat = Column(Float)
#     associated_offshoreLocation_lon = Column(Float)
#     associated_offshoreLocation_lat = Column(Float)
#     associated_runInitializationTimestamp = Column(BigInteger)
#     associated_spotId = Column(String)
#     timestamp = Column(String)
#     probability = Column(Float)
#     utcOffset = Column(Integer)
#     wave_power = Column(Float)
#     surf_min = Column(Integer)
#     surf_max = Column(Integer)
#     surf_plus = Column(Boolean)
#     surf_humanRelation = Column(String)
#     surf_raw_min = Column(Float)
#     surf_raw_max = Column(Float)
#     surf_optimalScore = Column(Integer)
#     data_spotId = Column(String)
#     height = Column(Float)
#     period = Column(Integer)
#     impact = Column(Float)
#     swell_power = Column(Float)
#     direction = Column(Float)
#     directionMin = Column(Float)
#     optimalScore = Column(Integer)
#     data_wave_timestamp = Column(String)
#     swells_idx = Column(Integer)


In [None]:
from utils.models import SlRatings, create_tables

In [None]:
create_tables()

In [None]:
# class SpotForecast:
#     def __init__(self, database_uri):
#         self.spots = []
#         self.engine = create_engine(database_uri)
#         self.SessionLocal = sessionmaker(bind=engine)


#     def get_session(self):
#         return self.SessionLocal()


#     def fetch_all_forecasts(self) -> List[Dict[Any, Any]]:
#         data = []
#         for spot in self.spots[:2]:
#             result = self.fetch_forecast(
#                 SlApiEndpoints.WAVE.value, SlApiParams.SPOT_ID.value, param=spot
#             )
#             if result.get("associated"):
#                 result["associated"]["spotId"] = spot
#                 result["data"]["spotId"] = spot
#             data.append(result)
#         return data


#     def fetch_forecast(self, endpoint: SlApiEndpoints, param_type: SlApiParams, param: str) -> Dict[Any, Any]:
#         base_url = "https://services.surfline.com/kbyg/spots/forecasts"
#         res = requests.get(f"{base_url}/{endpoint}", params={param_type: param})
#         data = res.json()
#         return data


#     def fetch_spots_from_db(self) -> None:
#         with self.get_session() as db:
#             stmt = select(SlSpots.spot_id)
#             self.spots = db.execute(stmt).scalars().all()


#     def transform_wave_data(self, data: Dict) -> List[Dict[Any, Any]]:
#         if not data:
#             raise ValueError("Data is empty")

#         meta_df = pd.json_normalize(data, sep="_")
#         meta_df.drop(
#             ["permissions_violations", "permissions_data", "data_wave", "data_spotId"],
#             inplace=True,
#             axis=1,
#         )

#         wave_df = pd.json_normalize(
#             jetty_waves, record_path=["data", "wave"], meta=[["data", "spotId"]], sep="_"
#         )
#         wave_df.drop("swells", inplace=True, axis=1)
#         wave_df.rename(columns={"power": "wave_power"}, inplace=True)

#         swell_df = pd.json_normalize(
#             jetty_waves,
#             record_path=["data", "wave", "swells"],
#             meta=[["data", "wave", "timestamp"], ["data", "spotId"]],
#             sep="_",
#         )

#         swell_df.rename(columns={"power": "swell_power"}, inplace=True)
#         swell_df["swells_idx"] = swell_df.groupby("data_wave_timestamp").cumcount()

#         combined_waves_df = pd.merge(
#             wave_df,
#             swell_df,
#             how="inner",
#             left_on=["timestamp", "data_spotId"],
#             right_on=["data_wave_timestamp", "data_spotId"],
#         )

#         combined_df = pd.merge(meta_df, combined_waves_df, how="cross")
#         dict_record = combined_df.to_dict("records")

#         return dict_record


#     def load_to_pg(self, dict_record: List[Dict[Any, Any]]) -> None:
#         with self.get_session() as db:
#             db.bulk_insert_mappings(SlRatings, dict_record)
#             db.commit()

#     def process_all_spot_ratings(self):
#         self.fetch_spots_from_db()
#         data = self.fetch_all_forecasts()
#         for spot in data:
#             record = self.transform_wave_data(spot)
#             self.load_to_pg(record)


    

In [None]:
spot_forecast = SpotForecast(LOCAL_PG_URI)

In [None]:
spot_forecast.process_all_spot_ratings()

In [None]:
test_insert = spot_forecast.transform_wave_data(jetty_waves)

In [None]:
test_insert

In [None]:
test_insert_dict = test_insert.to_dict('records')

In [None]:
type(test_insert_dict[0])

In [None]:
with SessionLocal() as db:
    db.bulk_insert_mappings(SlRatings, test_insert_dict)
    db.commit()