In [1]:
import os

import pandas as pd
import requests
from pydantic import BaseModel, ConfigDict
from sqlalchemy import and_, insert, not_, select, text, tuple_
from utils.bodhi_models import BodhiWaves, BohdiWavesModel
from utils.bodhi_models import engine as bodhi_engine
from utils.bodhi_models import get_session
from typing import List, Dict, Any, Optional
import logging
from utils.sl_models import engine as sl_engine

### Test Pydantic Models

In [2]:
with get_session(bodhi_engine) as db:
    stmt = """select * from wave_forecast limit 1"""
    results = db.execute(stmt).fetchall()

  results = db.execute(stmt).fetchall()


In [3]:
results

[(1, '0101000020E61000000000000000604B400000000000805440', 82.0, 54.75, datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), datetime.timedelta(0), datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), 0.029999999329447746, 1.2300000190734863, 51.7400016784668, 0.019999999552965164, 0.3100000023841858, 24.479999542236328, 2.2899999618530273, 188.36000061035156, None, None, datetime.datetime(2024, 6, 12, 13, 19, 52, 744570, tzinfo=datetime.timezone.utc))]

In [4]:
[BohdiWavesModel.model_validate(entry._asdict()) for entry in results]

[BohdiWavesModel(id=1, location='0101000020E61000000000000000604B400000000000805440', latitude=82.0, longitude=54.75, time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), step=datetime.timedelta(0), valid_time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), swh=0.029999999329447746, perpw=1.2300000190734863, dirpw=51.7400016784668, shww=0.019999999552965164, mpww=0.3100000023841858, wvdir=24.479999542236328, ws=2.2899999618530273, wdir=188.36000061035156, swell=None, swper=None, entry_updated=datetime.datetime(2024, 6, 12, 13, 19, 52, 744570, tzinfo=datetime.timezone.utc))]

## Sl Spots by id, lat, and lon

In [5]:
class SpotSpatialIdx(BaseModel):
    spot_id: str
    spot_lat: float
    spot_lon: float

    model_config = ConfigDict(from_attributes=True)

In [6]:
with get_session(sl_engine) as db:
    stmt = "select spot_id, spot_lat, spot_lon from sl_spots"
    results = db.execute(stmt).fetchall()


In [7]:
spatial_idxs = [SpotSpatialIdx.model_validate(entry) for entry in results]

### Get the associated Offshore Location for each spot

In [8]:
with get_session(sl_engine) as db:
    stmt = text("""select distinct on ("associated_spotId") "associated_spotId", "associated_offshoreLocation_lat", "associated_offshoreLocation_lon" from sl_ratings""")
    results = db.execute(stmt).fetchall()

In [9]:
class SlOffshoreIdx(BaseModel):
    associated_spotId: str
    associated_offshoreLocation_lat: float
    associated_offshoreLocation_lon: float

    model_config = ConfigDict(from_attributes=True)

In [10]:
data = [SlOffshoreIdx.model_validate(entry) for entry in results]

In [11]:
data_dicts = [entry.model_dump() for entry in data]

In [12]:
data_dicts[0]

{'associated_spotId': '5842041f4e65fad6a77087f9',
 'associated_offshoreLocation_lat': 37.5,
 'associated_offshoreLocation_lon': -122.75}

#### Transform to dataframe

Filter to only include where the spot's offshore location matches bodhi-cast's offshore location

In [13]:
df = pd.DataFrame(data_dicts)

In [14]:
df.head()

Unnamed: 0,associated_spotId,associated_offshoreLocation_lat,associated_offshoreLocation_lon
0,5842041f4e65fad6a77087f9,37.5,-122.75
1,5842041f4e65fad6a7708804,45.25,-124.25
2,5842041f4e65fad6a7708805,36.75,-122.25
3,5842041f4e65fad6a7708806,36.75,-122.25
4,5842041f4e65fad6a7708807,36.9,-122.1


Create a mask to only keep lat an lon where they are in the intervals .0, .25, .5, .75

In [15]:
df['lat_mod'] = df['associated_offshoreLocation_lat'] % 4
df['lon_mod'] = df['associated_offshoreLocation_lon'] % 4

In [16]:
mask = (df['lat_mod'].apply(lambda x: round(x, 2) == x) & df['lon_mod'].apply(lambda x: round(x, 2) == x))

In [17]:
df = df[mask]

In [18]:
df = df.drop(columns=['lat_mod', 'lon_mod'])

In [19]:
len(df)

579

In [20]:
lat_lon_list = list(zip(df['associated_offshoreLocation_lat'].values, df['associated_offshoreLocation_lon'].values))

In [21]:
list(set(zip(df['associated_offshoreLocation_lat'].values, df['associated_offshoreLocation_lon'].values)))

[(37.0, -122.5),
 (57.0, -135.75),
 (38.25, -123.25),
 (31.0, -81.0),
 (26.25, -79.75),
 (45.5, -83.5),
 (39.5, -124.25),
 (42.0, -87.5),
 (41.25, -70.75),
 (41.25, -70.5),
 (41.25, -70.0),
 (26.5, -97.0),
 (29.0, -94.75),
 (41.5, -69.75),
 (38.5, -123.75),
 (42.5, -70.5),
 (46.0, -124.25),
 (43.25, -70.25),
 (43.25, -70.5),
 (27.5, -96.75),
 (28.75, -90.25),
 (31.5, -80.75),
 (28.75, -90.0),
 (39.25, -74.25),
 (34.0, -77.5),
 (36.5, -75.5),
 (35.75, -121.75),
 (30.0, -87.25),
 (30.0, -87.5),
 (30.0, -87.0),
 (33.0, -79.0),
 (29.25, -94.5),
 (41.75, -69.5),
 (29.25, -94.25),
 (41.75, -124.5),
 (32.5, -79.75),
 (56.75, -135.5),
 (56.75, -135.75),
 (46.25, -124.25),
 (43.5, -70.0),
 (43.5, -70.25),
 (30.75, -81.0),
 (26.0, -82.0),
 (28.25, -96.0),
 (40.0, -74.0),
 (44.25, -124.25),
 (39.5, -74.0),
 (43.75, -69.5),
 (28.5, -95.25),
 (43.75, -124.25),
 (48.25, -124.75),
 (40.25, -73.75),
 (47.0, -124.5),
 (47.0, -124.25),
 (32.75, -79.5),
 (32.75, -79.25),
 (38.75, -75.0),
 (33.5, -78.75),

In [22]:
lat_lon_str = ', '.join(map(str, lat_lon_list))

### Enable postgis

In [23]:
with get_session(sl_engine) as db:
    stmt = text("""CREATE EXTENSION IF NOT EXISTS postgis""")
    results = db.execute(stmt)
    db.commit()

### Create indexes and reindex 

In [24]:
with get_session(bodhi_engine) as db:
    stmt = text("""CREATE INDEX if not exists idx_wave_forecast_lat_lon ON wave_forecast (latitude, longitude)""")
    results = db.execute(stmt)
    db.commit()

In [25]:
with get_session(bodhi_engine) as db:
    stmt = text("""reindex index idx_wave_forecast_lat_lon""")
    results = db.execute(stmt)
    db.commit()

### Getting matching bodhi wave data

Get all waves from bodhi for the current day that match the filtered sl spots 

In [26]:
with get_session(bodhi_engine) as db:
    stmt = text(f"""select * from wave_forecast where time = CURRENT_DATE AND (latitude, longitude) in ({lat_lon_str}) limit 5""")
    results = db.execute(stmt).fetchall()

In [27]:
data = [BohdiWavesModel.model_validate(entry) for entry in results]

In [28]:
data

[BohdiWavesModel(id=30254, location='0101000020E610000000000000008061C00000000000C04D40', latitude=59.5, longitude=-140.0, time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), step=datetime.timedelta(0), valid_time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), swh=1.9600000381469727, perpw=10.489999771118164, dirpw=218.67999267578125, shww=None, mpww=None, wvdir=None, ws=4.110000133514404, wdir=269.8800048828125, swell=1.8700000047683716, swper=10.489999771118164, entry_updated=datetime.datetime(2024, 6, 12, 13, 19, 52, 744570, tzinfo=datetime.timezone.utc)),
 BohdiWavesModel(id=34323, location='0101000020E610000000000000000063C00000000000E04C40', latitude=57.75, longitude=-152.0, time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), step=datetime.timedelta(0), valid_time=datetime.datetime(2024, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), swh=1.3300000429153442, perpw=8.25, dirpw=162.5, shww=0.009999999776482582, mpww=0.20000

## Create New Table to push to same postgres db as other sl data

In [29]:
from utils.bodhi_models import create_tables as create_bodhi_tables


In [30]:
create_bodhi_tables(sl_engine)

In [None]:
data_dicts = [entry.model_dump() for entry in data]

In [None]:
for d in data_dicts:
    d.pop("location", None)

In [None]:
data_dicts[0].keys()

In [None]:
# with get_session(sl_engine) as db:
#     stmt = insert(BodhiWaves).values(data_dicts)
#     db.execute(stmt)
#     db.commit()

In [None]:
def fetch_wave_data(lat_lon_str):
    with get_session(bodhi_engine) as db:
        stmt = text(
            f"""select * from wave_forecast where time = CURRENT_DATE AND (latitude, longitude) in ({lat_lon_str})"""
        )
        results = db.execute(stmt).fetchall()
        data = [BohdiWavesModel.model_validate(entry) for entry in results]
        data_dict = [entry.model_dump() for entry in data]
        for d in data_dict:
            d.pop("location", None)
    return data_dict

def wave_data_to_db(data):
    with get_session(sl_engine) as db:
        stmt = insert(BodhiWaves).values(data)
        db.execute(stmt)
        db.commit()

def batch(iterable, n=1):
    l = len(iterable)
    for idx in range(0, l, n):
        yield iterable[idx:min(idx + n, l)]

def get_all_batches(lat_lon_list, bs=10):
    processed = 0
    for batch_lat_lon_list in batch(lat_lon_list, bs):
        lat_lon_str = ', '.join(map(str, batch_lat_lon_list))
        data = fetch_wave_data(lat_lon_str)
        wave_data_to_db(data)
        processed += len(batch_lat_lon_list)
        logging.info(f"Processed {processed} out of {len(lat_lon_list)}.")

In [None]:
len(list(set(lat_lon_list))), len(lat_lon_list)

In [None]:
lat_lon_list = list(set(lat_lon_list))

In [None]:
lat_lon_list

In [None]:
get_all_batches(lat_lon_list, 5)

In [None]:
data = fetch_wave_data()

In [None]:
wave_data_to_db(data)

In [None]:
fetch_wave_data(1)