# NWS weekly forcast for the LA area
> This notebook fetches and processes XML data for numerous locations and stores it as a Pandas dataframe

---

#### Import Python tools and Jupyter config

In [1]:
import json
import tqdm
import requests
import pandas as pd
import jupyter_black
from xml.etree import ElementTree as ET

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 200
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option("display.max_columns", None)

---

In [3]:
# Load locations from the config file
with open("../data/raw/locations.json", "r") as f:
    locations = json.load(f)

base_url = "https://forecast.weather.gov/MapClick.php?lat={}&lon={}&unit=0&lg=english&FcstType=dwml"

In [8]:
def fetch_weather_data(latitude, longitude):
    url = base_url.format(latitude, longitude)
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for {latitude}, {longitude}")
        return None


def parse_weather_data(xml_data):
    root = ET.fromstring(xml_data)
    location = (
        root.find(".//location/description")
        .text.replace(", CA", "")
        .replace("East L.A.", "Downtown LA")
        .strip()
    )

    time_layouts = {}
    for time_layout in root.findall(".//time-layout"):
        layout_key = time_layout.find("layout-key").text
        times = [time.text for time in time_layout.findall("start-valid-time")]
        time_layouts[layout_key] = times

    data = []
    parameters = root.find(".//parameters")
    max_temps = parameters.findall(".//temperature[@type='maximum']")
    min_temps = parameters.findall(".//temperature[@type='minimum']")
    pop_values = parameters.findall(".//probability-of-precipitation")
    weather_conditions = parameters.findall(".//weather")
    word_forecasts = parameters.findall(".//wordedForecast")
    # condition_icons = parameters.findall(".//conditions-icon[@type='forecast-NWS']")

    for temp in max_temps:
        layout_key = temp.attrib["time-layout"]
        for i, value in enumerate(temp.findall("value")):
            if value.text:
                entry = {
                    "location": location,
                    "time": time_layouts[layout_key][i],
                    "measure": "daily_maximum_temperature",
                    "value": int(value.text),
                }
                data.append(entry)

    for temp in min_temps:
        layout_key = temp.attrib["time-layout"]
        for i, value in enumerate(temp.findall("value")):
            if value.text:
                entry = {
                    "location": location,
                    "time": time_layouts[layout_key][i],
                    "measure": "daily_minimum_temperature",
                    "value": int(value.text),
                }
                data.append(entry)

    for word in word_forecasts:
        layout_key = word.attrib["time-layout"]
        for i, text in enumerate(word.findall("text")):
            if text.text:
                entry = {
                    "location": location,
                    "time": time_layouts[layout_key][i],
                    "measure": "word_forecast",
                    "value": text.text,
                }
                data.append(entry)

    for pop in pop_values:
        layout_key = pop.attrib["time-layout"]
        for i, value in enumerate(pop.findall("value")):
            if value.text:
                entry = {
                    "location": location,
                    "time": time_layouts[layout_key][i],
                    "measure": "probability_of_precipitation",
                    "value": int(value.text),
                }
                data.append(entry)

    for weather in weather_conditions:
        layout_key = weather.attrib["time-layout"]
        for i, conditions in enumerate(weather.findall("weather-conditions")):
            summary = conditions.attrib.get("weather-summary", None)
            if summary:
                entry = {
                    "location": location,
                    "time": time_layouts[layout_key][i],
                    "measure": "weather",
                    "value": summary,
                }
                data.append(entry)

    return data


all_data = []

for location, coords in locations.items():
    xml_data = fetch_weather_data(coords["latitude"], coords["longitude"])
    if xml_data:
        weather_data = parse_weather_data(xml_data)
        all_data.extend(weather_data)

# Convert to DataFrame
df = (
    pd.DataFrame(all_data)
    .pivot(index=["location", "time"], values="value", columns="measure")
    .reset_index()
)

In [9]:
df

measure,location,time,daily_maximum_temperature,daily_minimum_temperature,weather,word_forecast
0,Arcadia,2024-07-30T18:00:00-07:00,,58,Clear,"Clear, with a low around 58. Southwest wind 5 to 10 mph becoming light south southwest in the evening."
1,Arcadia,2024-07-31T06:00:00-07:00,89,,Sunny,"Sunny, with a high near 89. Light south southwest wind becoming southwest 5 to 10 mph in the afternoon."
2,Arcadia,2024-07-31T18:00:00-07:00,,61,Clear,"Clear, with a low around 61. Southwest wind 5 to 10 mph becoming light south in the evening."
3,Arcadia,2024-08-01T06:00:00-07:00,88,,Mostly Sunny,"Mostly sunny, with a high near 88. Calm wind becoming southwest around 5 mph in the afternoon."
4,Arcadia,2024-08-01T18:00:00-07:00,,63,Partly Cloudy,"Partly cloudy, with a low around 63. Southwest wind around 5 mph becoming calm in the evening."
...,...,...,...,...,...,...
149,Torrance,2024-08-04T06:00:00-07:00,79,,Patchy Fog then Mostly Sunny,"Patchy fog before 11am. Otherwise, partly sunny, with a high near 79."
150,Torrance,2024-08-04T18:00:00-07:00,,59,Patchy Fog,"Patchy fog. Otherwise, mostly cloudy, with a low around 59."
151,Torrance,2024-08-05T06:00:00-07:00,79,,Patchy Fog then Partly Sunny,"Patchy fog. Otherwise, partly sunny, with a high near 79."
152,Torrance,2024-08-05T18:00:00-07:00,,59,Patchy Fog,"Patchy fog. Otherwise, mostly cloudy, with a low around 59."
