# Step 2 - Weather Data Process - Get Weather Station Data for Mapped Weather Stations

<b>Summary:</b><br>
Uses Synoptic Weather Data API to grab weather station data for the mapped weather stations to cameras.<br>
Synoptic is partner of SDG&E, helping store and serve their weather station data--accessible via API.

- Read in processed camera weather station mappings
- Get all station data for input timerange
- Save weater station data by network
- Convert wind speed and direction to uv components
- Save processed data with only desired columns

<b>Output:</b><br>
.<br>
├── data<br>
&emsp;&emsp;&emsp;├── processed<br>
&emsp;&emsp;&emsp;&nbsp;│&emsp;&emsp;&nbsp;├── weather_HPWREN.csv<br>
&emsp;&emsp;&emsp;&nbsp;│&emsp;&emsp;&nbsp;├── weather_SC-EDISON.csv<br>
&emsp;&emsp;&emsp;&nbsp;│&emsp;&emsp;&nbsp;├── weather_SDGE.csv<br>
&emsp;&emsp;&emsp;└── raw<br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;├── weather_HPWREN.csv<br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;├── weather_SC-EDISON.csv<br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;└── weather_SDGE.csv

<b>Instructions:</b><br>
- Create a copy of `config.json.example` and remove the `.example`
- Update the `synoptic_api_token` value

In [2]:
import json
import os
from ast import literal_eval
from io import StringIO

import fastparquet
import geopandas
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import rtree
from geopandas import GeoDataFrame
from shapely.geometry import Point, Polygon
from tqdm.notebook import tqdm, trange

## Parameters

In [2]:
# Base url and API token
with open("config.json") as config_file:
    cfg = json.load(config_file)

API_BASE_URL = "https://api.synopticdata.com/v2/"
API_TOKEN = cfg["synoptic_api_token"]

# should be SHORTNAME values from mesonet - should align to notebook 1
WEATHER_NETWORKS = {139: "SDGE", 81: "HPWREN", 231: "SC-EDISON"}

# figlib start = 2016-06-04 (assume PST)
# figlib end = 2021-07-11 (assume PST)
# YYYYmmddHHMM (format needed for api)
START_DATETIME = "201606030000"
END_DATETIME = "202107120000"
# END_DATETIME = "202112311159"

## 1. Get data for matched weather stations

### Read weather station mappings

In [29]:
station_mappings_df = pd.read_csv("../../data/processed/camera_station_mappings.csv")
print(station_mappings_df.shape)

(309, 7)


In [30]:
station_mappings_df

Unnamed: 0,properties.description.id,stid,shortname,distance_m,distance_mi,is_in_direction,rn
0,hpwren0_unknown direction,BFDSD,SDGE,2362.963219,1.468277,,1
1,hpwren0_unknown direction,CVXSD,SDGE,8161.908209,5.071575,,2
2,hpwren0_unknown direction,DJZSD,SDGE,10216.081526,6.347979,,3
3,hpwren1_north,HP016,HPWREN,0.000000,0.000000,True,1
4,hpwren1_north,MGDSD,SDGE,4637.139293,2.881385,True,2
...,...,...,...,...,...,...,...
304,hpwren30_south,TLGSD,SDGE,4521.722898,2.809668,True,2
305,hpwren30_south,CRISD,SDGE,8167.055431,5.074773,True,3
306,hpwren30_west,HP024,HPWREN,0.000000,0.000000,True,1
307,hpwren30_west,TLGSD,SDGE,4521.722898,2.809668,True,2


### Helper function to get weather station data given stid and start/end time

In [31]:
def get_historical_station_readings(stid: str, start: str, end: str) -> str:
    """
    Return csv string of station readings given station id(s) and timeframe.
    Start and end format = YYYYmmddHHMM.

    Note:
    For multiple stids, would need to remove output csv, but would be in json format.
    """
    # All times are requested in UTC, but may be returned in either UTC or Local time
    endpoint = f"{API_BASE_URL}stations/timeseries"
    params = {
        "token": API_TOKEN,
        "stid": stid,
        "start": start,
        "end": end,
        "obtimezone": "UTC",
        "output": "csv",
    }
    r = requests.get(endpoint, params=params)
    if r.status_code not in range(200, 299):
        # TODO: add error handling
        print("Error!")
        return {}
    # return r.json()
    return r.text

### Get all data for time range given station network shortname

In [33]:
def get_network_station_data(df: pd.DataFrame, shortname: str = None) -> pd.DataFrame:
    """
    Return dataframe of station data given weather network shortname.
    If no shortname given, go through the full dataframe.
    """
    if shortname != None:
        df = df[df["shortname"] == shortname]
    stations = df["stid"].dropna().unique().tolist()

    weather_df_created = 0

    for idx, station in enumerate(tqdm(stations)):
        # print(station)
        # get csv string
        csv_string_io = StringIO(
            get_historical_station_readings(
                stid=station,
                start=START_DATETIME,
                end=END_DATETIME
                # start="201606030000",
                # end="201606030100",
            )
        )
        # initial creation of df
        if not weather_df_created:
            weather_df = pd.read_csv(csv_string_io, skiprows=6, header=[0, 1])
            weather_df_created = 1
        # additional appends to df
        else:
            temp_df = pd.read_csv(csv_string_io, skiprows=6, header=[0, 1])
            weather_df = pd.concat([weather_df, temp_df])

    return weather_df

In [34]:
for key in WEATHER_NETWORKS:
    # if key == 139:
    #     continue
    # print(WEATHER_NETWORKS[key])
    network_station_df = get_network_station_data(
        station_mappings_df, WEATHER_NETWORKS[key]
    )
    network_station_df.to_csv(
        f"../../data/raw/weather_{WEATHER_NETWORKS[key]}.csv", index=False
    )
    # break

  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

In [None]:
# %%time

# weather_df_created = 0

# for idx, station in enumerate(tqdm(stations)):
#     # print(station)
#     # get csv string
#     csv_string_io = StringIO(
#         get_historical_station_readings(
#             stid=station,
#             start=START_DATETIME,
#             end=END_DATETIME
#             # start="201606030000",
#             # end="201606030100",
#         )
#     )
#     # initial creation of df
#     if not weather_df_created:
#         weather_df = pd.read_csv(csv_string_io, skiprows=6, header=[0, 1])
#         weather_df_created = 1
#     # additional appends to df
#     else:
#         temp_df = pd.read_csv(csv_string_io, skiprows=6, header=[0, 1])
#         weather_df = pd.concat([weather_df, temp_df])

# # prev runtime: 1h 6min 56s

<hr>

## 2. Convert wind speed and direction to uv components

Reference: http://colaweb.gmu.edu/dev/clim301/lectures/wind/wind-uv

Directions are originally in "weather wind direction" and will be converted to "math wind direction" for uv componenent calculation.<br>
Null wind speed will remain as null for vector component. Null wind direction results in 0 v component.

### Read raw weather data

In [5]:
weather_sdge_df = pd.read_csv("../../data/raw/weather_SDGE.csv", header=[0, 1])
weather_hpwren_df = pd.read_csv("../../data/raw/weather_HPWREN.csv", header=[0, 1])
weather_sce_df = pd.read_csv("../../data/raw/weather_SC-EDISON.csv", header=[0, 1])

### Helper function to calculate uv components

In [65]:
def calc_uv_components(df: pd.DataFrame) -> pd.DataFrame:
    """
    Return original dataframe with uv column componenets.
    """
    # convert direction to math direction
    df["wind_direction_math"] = 270 - df["wind_direction_set_1"]["Degrees"]
    # if negative add 360
    df.loc[df["wind_direction_math"] < 0, ["wind_direction_math"]] += 360
    # convert degrees to radians
    df["wind_direction_math_r"] = np.radians(df["wind_direction_math"])

    # calculate uv components
    df["u"] = df["wind_speed_set_1"]["m/s"] * np.cos(df["wind_direction_math_r"])
    df["v"] = df["wind_speed_set_1"]["m/s"] * np.sin(df["wind_direction_math_r"])
    return df

In [68]:
weather_sdge_df = calc_uv_components(weather_sdge_df)
weather_hpwren_df = calc_uv_components(weather_hpwren_df)
weather_sce_df = calc_uv_components(weather_sce_df)

In [84]:
weather_sdge_df.head()

Unnamed: 0_level_0,Station_ID,Date_Time,air_temp_set_1,relative_humidity_set_1,wind_speed_set_1,volt_set_1,wind_gust_set_1,wind_direction_set_1,dew_point_temperature_set_1d,wind_chill_set_1d,...,sea_level_pressure_set_1d,sea_level_pressure_set_1d,wet_bulb_temperature_set_1d,altimeter_set_1d,fuel_temp_set_1,precip_accum_ten_minute_set_1,u,v,wind_direction_math,wind_direction_math_r
Unnamed: 0_level_1,Unnamed: 0_level_1,Unnamed: 1_level_1,Celsius,%,m/s,volts,m/s,Degrees,Celsius,Celsius,...,Pascals,Pascals.1,Celsius,Pascals,Celsius,Millimeters,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,BFDSD,2016-06-03T00:00:00Z,18.33,87.0,4.47,,6.26,280.0,16.11,,...,,,,,,,4.402091,-0.776207,350.0,6.108652
1,BFDSD,2016-06-03T00:10:00Z,18.33,87.0,4.02,,6.26,280.0,16.11,,...,,,,,,,3.958927,-0.698066,350.0,6.108652
2,BFDSD,2016-06-03T00:20:00Z,17.78,88.0,4.02,,5.37,275.0,15.75,,...,,,,,,,4.004703,-0.350366,355.0,6.195919
3,BFDSD,2016-06-03T00:30:00Z,17.78,88.0,4.02,,5.37,270.0,15.75,,...,,,,,,,4.02,0.0,0.0,0.0
4,BFDSD,2016-06-03T00:40:00Z,17.78,89.0,3.58,,5.37,271.0,15.93,,...,,,,,,,3.579455,-0.06248,359.0,6.265732


### Write processed data to csv keeping only desired columns

In [73]:
columns = [
    "Station_ID",
    "Date_Time",
    "air_temp_set_1",
    "relative_humidity_set_1",
    "wind_speed_set_1",
    "wind_gust_set_1",
    "wind_direction_set_1",
    "dew_point_temperature_set_1d",
    "u",
    "v",
]

In [79]:
weather_sdge_df[columns].to_csv("../../data/processed/weather_SDGE.csv", index=False)
weather_hpwren_df[columns].to_csv(
    "../../data/processed/weather_HPWREN.csv", index=False
)
weather_sce_df[columns].to_csv(
    "../../data/processed/weather_SC-EDISON.csv", index=False
)