# NWS hourly forcast for the LA area
> This notebook fetches and processes XML data for numerous locations and stores it as a Pandas dataframe

---

#### Import Python tools and Jupyter config

In [53]:
import json
import tqdm
import requests
import pandas as pd
import jupyter_black
from xml.etree import ElementTree as ET

In [39]:
jupyter_black.load()
pd.options.display.max_columns = 200
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option("display.max_columns", None)

In [76]:

locations = {
    "Santa Monica": {"latitude": 34.0195, "longitude": -118.4912},
    "Culver City": {"latitude": 34.0219, "longitude": -118.3965},
    "Pasadena": {"latitude": 34.1478, "longitude": -118.1445},
    "Irvine": {"latitude": 33.6846, "longitude": -117.8265},
    "Manhattan Beach": {"latitude": 33.8847, "longitude": -118.4109},
    "Downtown Los Angeles": {"latitude": 34.0407, "longitude": -118.2468},
    "Arcadia": {"latitude": 34.1397, "longitude": -118.0353},
    "Burbank": {"latitude": 34.1808, "longitude": -118.3089},
    "Torrance": {"latitude": 33.8358, "longitude": -118.3406},
    "Newport Beach": {"latitude": 33.6189, "longitude": -117.9298},
    "Malibu": {"latitude": 34.0259, "longitude": -118.7798},
}

base_url = (
    "https://forecast.weather.gov/MapClick.php?lat={}&lon={}&FcstType=digitalDWML"
)


def fetch_weather_data(latitude, longitude):
    url = base_url.format(latitude, longitude)
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for {latitude}, {longitude}")
        return None


def parse_weather_data(xml_data):
    root = ET.fromstring(xml_data)
    data = []
    location_name = root.find(".//location/description")
    if location_name is not None:
        location_name = (
            location_name.text.replace("CA, CA", "")
            .replace("East L.A.", "Downtown LA")
            .strip()
        )
    else:
        location_name = "Unknown Location"

    time_layout = {}
    for layout in root.findall(".//time-layout"):
        layout_key = layout.find("layout-key").text
        start_times = layout.findall("start-valid-time")
        time_layout[layout_key] = [start_time.text for start_time in start_times]

    parameters = root.find(".//parameters")
    temp_values = [
        int(temp.text) if temp.text is not None else None
        for temp in parameters.findall(".//temperature[@type='hourly']/value")
    ]
    humidity_values = [
        int(hum.text) if hum.text is not None else None
        for hum in parameters.findall(".//humidity[@type='relative']/value")
    ]
    wind_speed_values = [
        int(ws.text) if ws.text is not None else None
        for ws in parameters.findall(".//wind-speed[@type='sustained']/value")
    ]
    wind_direction_values = [
        int(wd.text) if wd.text is not None else None
        for wd in parameters.findall(".//direction[@type='wind']/value")
    ]
    cloud_cover_values = [
        int(cc.text) if cc.text is not None else None
        for cc in parameters.findall(".//cloud-amount[@type='total']/value")
    ]
    hourly_qpf_values = [
        float(qpf.text) if qpf.text is not None else None
        for qpf in parameters.findall(".//hourly-qpf/value")
    ]
    pop_values = [
        int(pop.text) if pop.text is not None else None
        for pop in parameters.findall(".//probability-of-precipitation/value")
    ]

    for i, time in enumerate(time_layout["k-p1h-n1-0"]):
        data.append(
            {
                "location": location_name,
                "time": time,
                "temperature": temp_values[i] if i < len(temp_values) else None,
                "humidity": humidity_values[i] if i < len(humidity_values) else None,
                "wind_speed": (
                    wind_speed_values[i] if i < len(wind_speed_values) else None
                ),
                "wind_direction": (
                    wind_direction_values[i] if i < len(wind_direction_values) else None
                ),
                "cloud_cover": (
                    cloud_cover_values[i] if i < len(cloud_cover_values) else None
                ),
                "hourly_qpf": (
                    hourly_qpf_values[i] if i < len(hourly_qpf_values) else None
                ),
                "probability_of_precipitation": (
                    pop_values[i] if i < len(pop_values) else None
                ),
            }
        )

    return data


all_data = []

for location, coords in locations.items():
    xml_data = fetch_weather_data(coords["latitude"], coords["longitude"])
    if xml_data:
        weather_data = parse_weather_data(xml_data)
        all_data.extend(weather_data)

# Convert to DataFrame
df = pd.DataFrame(all_data)

In [77]:
df.query('location == "Culver City"')

Unnamed: 0,location,time,temperature,humidity,wind_speed,wind_direction,cloud_cover,hourly_qpf,probability_of_precipitation
168,Culver City,2024-07-30T11:00:00-07:00,73,57,9,220,11,0.0,1
169,Culver City,2024-07-30T12:00:00-07:00,74,56,9,220,11,0.0,1
170,Culver City,2024-07-30T13:00:00-07:00,74,56,9,220,11,0.0,1
171,Culver City,2024-07-30T14:00:00-07:00,74,56,11,230,11,0.0,1
172,Culver City,2024-07-30T15:00:00-07:00,73,57,11,230,11,0.0,1
...,...,...,...,...,...,...,...,...,...
331,Culver City,2024-08-06T06:00:00-07:00,66,77,2,90,90,,3
332,Culver City,2024-08-06T07:00:00-07:00,67,76,2,90,90,,3
333,Culver City,2024-08-06T08:00:00-07:00,69,74,2,170,90,,3
334,Culver City,2024-08-06T09:00:00-07:00,73,70,2,170,90,,3


In [80]:
df.location.unique()

array(['Santa Monica', 'Culver City', 'Pasadena', 'Irvine',
       'Manhattan Beach', 'Downtown LA', 'Arcadia', 'Burbank', 'Torrance',
       'Newport Beach', 'Malibu'], dtype=object)

In [81]:
['Santa Monica', 'Culver City', 'Pasadena', 'Irvine',
       'Manhattan Beach', 'Downtown LA', 'Arcadia', 'Burbank', 'Torrance',
       'Newport Beach', 'Malibu']

['location', 'time', 'temperature', 'humidity', 'wind_speed',
       'wind_direction', 'cloud_cover', 'hourly_qpf',
       'probability_of_precipitation']

<parameters applicable-location="point1">
<temperature type="hourly" time-layout="k-p1h-n1-0">
...
</temperature>
<temperature type="dew point" time-layout="k-p1h-n1-0">
...
</temperature>
<temperature type="heat index" time-layout="k-p1h-n1-0">
...
</temperature>
<probability-of-precipitation type="floating" units="percent" time-layout="k-p1h-n1-0">
...
</probability-of-precipitation>
<wind-speed type="sustained" time-layout="k-p1h-n1-0">
...
</wind-speed>
<wind-speed type="gust" time-layout="k-p1h-n1-0">
...
</wind-speed>
<direction type="wind" units="degrees true" time-layout="k-p1h-n1-0">
...
</direction>
<cloud-amount type="total" units="percent" time-layout="k-p1h-n1-0">
...
</cloud-amount>
<humidity type="relative" units="percent" time-layout="k-p1h-n1-0">
...
</humidity>
<hourly-qpf type="floating" units="inches" time-layout="k-p1h-n1-0">
...
</hourly-qpf>
<weather time-layout="k-p1h-n1-0">

SyntaxError: invalid syntax (466362938.py, line 5)

---

## Process

#### Clean dates, standardize categories, etc. 

---

## Aggregate

#### Groupby state, etc.

---

## Charts

#### Save the chart

In [None]:
chart.save("visuals/chart.png")
Image(filename="visuals/chart.png")

#### Make sure the chart is visible on Github

In [None]:
Image(filename="visuals/chart.png")

---

## Metadata

#### Data provenance, column descriptions, etc.

---

## Exports

#### XyXy subset in CSV format to `processed`

#### JSON, GeoJSON, etc., to `processed`