In [96]:
import os
import json
import numpy as np
import pandas as pd
import requests
import datetime

from dotenv import load_dotenv
from google.cloud import storage

In [97]:
# Load the API key
load_dotenv()
api_key = os.getenv("CBIBS_API_KEY")
if not api_key:
    raise ValueError("No API key found. Please check your .env file.")

The CBIBS API takes a handful of parameters:
- `key` (required): Your API key
- `var`: Variable required, can be `all`.
- `sd`: Start date, ISO8601 format (e.g. `2024-07-05T00:00:00Z`)
- `ed`: End date, ISO8601 format (e.g. `2024-07-06T01:00:00Z`)

An example query would look like this:
```
https://mw.buoybay.noaa.gov/api/v1/json/query/AN?key=<<api_key>>&sd=2020-04-01T10:00:00z&ed=2020-04-01T20:00:00z&var=sea_water_temperature
```

I'm most interested in Potomac, the `PL` station.

In [98]:
# Create a function to query the CBIBS API
def query_cbibs(
    station, sd, key=api_key, ed=f"{datetime.datetime.now().isoformat()}Z", var="all"
):
    api_url = f"https://mw.buoybay.noaa.gov/api/v1/json/query/{station}?key={key}&sd={sd}&ed={ed}&var={var}"
    print(api_url)
    response = requests.get(api_url)
    data = response.json()

    # List of relevant variables
    relevant_variables = [
        "air_pressure",
        "air_temperature",
        "wind_speed",
        "wind_speed_of_gust",
        "wind_from_direction",
        "relative_humidity",
        "latitude_decimal",
        "longitude_decimal",
        "sea_water_temperature",
        "sea_water_electrical_conductivity",
        "mml_avg_nitrates",
        "simple_turbidity",
        "seanettle_prob",
        "mass_concentration_of_chlorophyll_in_sea_water",
        "mass_concentration_of_oxygen_in_sea_water",
        "sea_water_salinity",
        "sea_surface_wind_wave_period",
        "wave_direction_spread",
        "sea_surface_wave_from_direction",
        "sea_surface_wave_significant_height",
        "sea_surface_wave_mean_height",
    ]

    # Extract the relevant part of the JSON response
    stations_data = data["stations"]

    # Normalize the nested data
    normalized_data = []
    for station in stations_data:
        for variable in station["variable"]:
            if variable["actualName"] in relevant_variables:
                variable_info = {
                    "actualName": variable["actualName"]
                }
                for measurement in variable["measurements"]:
                    measurement_info = {
                        "time": measurement["time"],
                        "value": measurement["value"],
                    }
                    # Combine all the information into one dictionary
                    combined_info = {
                        **variable_info,
                        **measurement_info,
                    }
                    normalized_data.append(combined_info)

    # Convert the normalized data into a DataFrame
    df = pd.DataFrame(normalized_data)
    
    return df

In [99]:
df = query_cbibs("PL", "2024-07-05T00:00:00Z")

https://mw.buoybay.noaa.gov/api/v1/json/query/PL?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2024-07-05T00:00:00Z&ed=2024-07-06T16:14:59.642620Z&var=all


In [113]:
pivoted_df = df.pivot(index="time", columns="actualName", values="value").sort_index()

In [115]:
pivot_df.isna().sum()

air_pressure                             0
air_temperature                          0
sea_surface_wave_from_direction          3
sea_surface_wave_significant_height      3
sea_surface_wind_wave_period             3
sea_water_electrical_conductivity        0
sea_water_salinity                       0
sea_water_temperature                    0
seanettle_prob                           0
wind_from_direction                    385
wind_speed                             385
wind_speed_of_gust                     385
dtype: int64

In [104]:
pivoted_df[["wind_from_direction", "wind_speed", "wind_speed_of_gust"]] = pivoted_df[
    ["wind_from_direction", "wind_speed", "wind_speed_of_gust"]
].fillna(value=0)
pivoted_df = pivoted_df.interpolate()

In [106]:
pivoted_df = pivoted_df.interpolate()