# LAX min/max temperatures: 1965-2021

### Import Python tools and Jupyter configuration

In [4]:
%load_ext lab_black

In [5]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt

In [6]:
import altair as alt
import altair_latimes as lat
import numpy as np

In [7]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [8]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

### Read data from NOAA

In [9]:
# https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/global-historical-climatology-network-ghcn

In [10]:
# https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt

In [30]:
stations = pd.read_fwf("data/raw/ghcnd-stations.txt", names=["station_id", "lat", "lon", "unk", "name", "end"])

In [31]:
stations

Unnamed: 0,ACW00011604,17.1167,-61.7833,10.1,ST JOHNS COOLIDGE FLD,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,ACW00011647,17.1333,-61.7833,19.2,ST JOHNS,,,
1,AE000041196,25.3330,55.5170,34.0,SHARJAH INTER. AIRP,,GSN,41196.0
2,AEM00041194,25.2550,55.3640,10.4,DUBAI INTL,,,41194.0
3,AEM00041217,24.4330,54.6510,26.8,ABU DHABI INTL,,,41217.0
4,AEM00041218,24.2620,55.6090,264.9,AL AIN INTL,,,41218.0
...,...,...,...,...,...,...,...,...
118486,ZI000067969,21.0500,29.3670,861.0,WEST NICHOLSON,,,67969.0
118487,ZI000067975,20.0670,30.8670,1095.0,MASVINGO,,,67975.0
118488,ZI000067977,21.0170,31.5830,430.0,BUFFALO RANGE,,,67977.0
118489,ZI000067983,20.2000,32.6160,1132.0,CHIPINGE,,GSN,67983.0


In [47]:
inventory = pd.read_fwf(
    "https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt",
    names=["station_id", "lat", "lon", "product", "begin", "end"],
)

In [53]:
# inventory["product"].value_counts()

In [61]:
inventory[inventory["station_id"].str.contains("US1CA")]

Unnamed: 0,station_id,lat,lon,product,begin,end
193358,US1CAAL0001,37.8123,122.2160,PRCP,2008,2021
193359,US1CAAL0001,37.8123,122.2160,SNOW,2008,2021
193360,US1CAAL0001,37.8123,122.2160,DAPR,2008,2021
193361,US1CAAL0001,37.8123,122.2160,MDPR,2008,2021
193362,US1CAAL0002,37.7075,122.0687,PRCP,2008,2012
...,...,...,...,...,...,...
198389,US1CAYL0029,38.6550,121.7452,SNOW,2021,2021
198390,US1CAYL0029,38.6550,121.7452,SNWD,2020,2020
198391,US1CAYL0029,38.6550,121.7452,DAPR,2020,2021
198392,US1CAYL0029,38.6550,121.7452,MDPR,2020,2021


In [21]:
src.columns = src.columns.str.lower()

### Dates

In [22]:
src["date"] = pd.to_datetime(src["date"], format="%m/%d/%Y")
src["date"] = pd.to_datetime(src["date"].dt.strftime("%Y-%m-%d"))
src["year"] = src["date"].dt.year
src["quarter"] = src["date"].dt.quarter
src["day"] = src["date"].dt.day
src["month"] = src["date"].dt.month
src["weekday"] = src["date"].dt.day_name()
src["monthname"] = src["date"].dt.month_name()
src["month_year"] = pd.to_datetime(src["date"]).dt.to_period("M")
src["month_year_full"] = src["date"].apply(lambda x: x.strftime("%B-%Y"))
src["month_year_full"] = pd.to_datetime(src["month_year_full"])

In [27]:
src.drop(["acsh", "psun", "tavg", "tsun"], axis=1, inplace=True)

In [33]:
src.head()

Unnamed: 0,station,name,date,tmax,tmin,year,quarter,day,month,weekday,monthname,month_year,month_year_full
0,USW00093134,"LOS ANGELES DOWNTOWN USC, CA US",1964-01-01,80,54,1964,1,1,1,Wednesday,January,1964-01,1964-01-01
1,USW00093134,"LOS ANGELES DOWNTOWN USC, CA US",1964-01-02,72,52,1964,1,2,1,Thursday,January,1964-01,1964-01-01
2,USW00093134,"LOS ANGELES DOWNTOWN USC, CA US",1964-01-03,73,50,1964,1,3,1,Friday,January,1964-01,1964-01-01
3,USW00093134,"LOS ANGELES DOWNTOWN USC, CA US",1964-01-04,72,51,1964,1,4,1,Saturday,January,1964-01,1964-01-01
4,USW00093134,"LOS ANGELES DOWNTOWN USC, CA US",1964-01-05,70,51,1964,1,5,1,Sunday,January,1964-01,1964-01-01
