In [1]:
%load_ext autoreload
%autoreload 2

In [188]:
from functools import partial

import polars as pl
import altair as alt

from weather.helpers.epw_read import read_epw
from weather.helpers.weather_data import PALO_ALTO_20
from weather.helpers.filter import filter_df_by_month
from weather.helpers.figures import get_todays_save_path

In [None]:
df = read_epw(PALO_ALTO_20.path)
month_filter = partial(filter_df_by_month, df, PALO_ALTO_20)
june = (
    month_filter(7)
    .filter(pl.col("datetime").dt.day() != 30)
    .select(["datetime", "Dry Bulb Temperature"])
)  # last day has only 23 values intead of 24..
assert (june["datetime"].dt.date().unique_counts().unique() == 24).all()

### examining differences between hours to see when large changes first occur - searching for T\*


In [None]:
hourly_deriv_df = june.with_columns(
    diff=pl.col("Dry Bulb Temperature").diff().fill_null(strategy="zero")
)
print(hourly_deriv_df.head())

alt.Chart(hourly_deriv_df).mark_circle().encode(
    alt.X("hours(datetime):T"), alt.Y("diff")
)

shape: (5, 3)
┌─────────────────────┬──────────────────────┬──────┐
│ datetime            ┆ Dry Bulb Temperature ┆ diff │
│ ---                 ┆ ---                  ┆ ---  │
│ datetime[μs]        ┆ f64                  ┆ f64  │
╞═════════════════════╪══════════════════════╪══════╡
│ 2020-07-01 00:00:00 ┆ 17.6                 ┆ 0.0  │
│ 2020-07-01 01:00:00 ┆ 17.1                 ┆ -0.5 │
│ 2020-07-01 02:00:00 ┆ 16.5                 ┆ -0.6 │
│ 2020-07-01 03:00:00 ┆ 16.0                 ┆ -0.5 │
│ 2020-07-01 04:00:00 ┆ 15.4                 ┆ -0.6 │
└─────────────────────┴──────────────────────┴──────┘


### correlation between morning temperature path and temp before T\*


In [None]:
morn_df = (
    june.filter(
        (pl.col("datetime").dt.hour() == 0) | (pl.col("datetime").dt.hour() == 4)
    )
    .with_columns(hour=pl.col("datetime").dt.hour(), date=pl.col("datetime").dt.date())
    .pivot(on="hour", index="date", values="Dry Bulb Temperature")
    .with_columns(deltaT=pl.col("0") - pl.col("4"))
)
morn_df.head()

date,0,4,deltaT
date,f64,f64,f64
2020-07-01,17.6,15.4,2.2
2020-07-02,15.5,13.4,2.1
2020-07-03,15.8,14.3,1.5
2020-07-04,16.5,16.0,0.5
2020-07-05,19.2,17.3,1.9


In [None]:
alt.Chart(morn_df).mark_circle().encode(
    alt.X("0:Q").scale(zero=False), alt.Y("deltaT:Q").scale(zero=False)
)

### Relationship between values around T\* and peak temp


In [127]:
t_star = 0
tstar_peak_df = hourly_deriv_df.group_by(
    pl.col("datetime").dt.date(), maintain_order=True
).agg(
    pl.col("diff")
    .filter(pl.col("datetime").dt.hour() == t_star)
    .first()
    .alias(f"deriv at {t_star}am"),
    pl.col("Dry Bulb Temperature").max().alias("max temp"),
)
tstar_peak_df.head()

datetime,deriv at 0am,max temp
date,f64,f64
2020-07-01,0.0,24.0
2020-07-02,-0.5,23.0
2020-07-03,-0.4,21.0
2020-07-04,-0.2,27.0
2020-07-05,-0.5,27.6


In [128]:
alt.Chart(tstar_peak_df).mark_circle().encode(
    alt.X(f"deriv at {t_star}am:Q").scale(zero=False),
    alt.Y("max temp:Q").scale(zero=False),
)

In [130]:
tstar_peak_no_deriv_df = hourly_deriv_df.group_by(
    pl.col("datetime").dt.date(), maintain_order=True
).agg(
    pl.col("Dry Bulb Temperature")
    .filter(pl.col("datetime").dt.hour() == t_star)
    .first()
    .alias(f"{t_star}am temp"),
    pl.col("Dry Bulb Temperature").max().alias("max temp"),
)
tstar_peak_no_deriv_df.head()

datetime,0am temp,max temp
date,f64,f64
2020-07-01,17.6,24.0
2020-07-02,15.5,23.0
2020-07-03,15.8,21.0
2020-07-04,16.5,27.0
2020-07-05,19.2,27.6


In [131]:
alt.Chart(tstar_peak_no_deriv_df).mark_circle().encode(
    alt.X(f"{t_star}am temp:Q").scale(zero=False), alt.Y("max temp:Q").scale(zero=False)
)

### Looking at elevated temperature characteristics..


In [114]:
dfi = june.group_by(pl.col("datetime").dt.date(), maintain_order=True).agg(
    pl.col("Dry Bulb Temperature").filter(
        pl.col("Dry Bulb Temperature") == pl.col("Dry Bulb Temperature").max()
    ).count().alias("count of max temp")
)
dfi["count of max temp"].unique()

count of max temp
u32
1
2
3


In [137]:
date_df = june.with_columns(date=pl.col("datetime").dt.date()).filter(pl.col("date") < pl.date(2020, 7, 9))
date_df.head()

datetime,Dry Bulb Temperature,date
datetime[μs],f64,date
2020-07-01 00:00:00,17.6,2020-07-01
2020-07-01 01:00:00,17.1,2020-07-01
2020-07-01 02:00:00,16.5,2020-07-01
2020-07-01 03:00:00,16.0,2020-07-01
2020-07-01 04:00:00,15.4,2020-07-01


In [199]:
alt.Chart(date_df).mark_bar().encode(
    alt.X("Dry Bulb Temperature:Q").bin(step=0.5),
    y=alt.Y("count()", title="count"),
    row="date",
).properties(
    width=300, height=50
)

#### faceted (by day) analysis of variation 

In [None]:
var_df = date_df.with_columns(hour=pl.col("datetime").dt.hour()).group_by(
    "date", maintain_order=True
).agg(
    pl.col("Dry Bulb Temperature").var().alias("global var"),
    pl.col("Dry Bulb Temperature").filter(
        (pl.col("hour") > 9) & (pl.col("hour") < 18)).var().alias("aftenoon var"),
)

join_df = date_df.join(var_df, on="date", )
join_df.head()

date,global var,aftenoon var
date,f64,f64
2020-07-01,7.918678,1.974286
2020-07-02,9.210851,2.025714
2020-07-03,4.458478,0.834286
2020-07-04,13.190417,2.13125
2020-07-05,9.436232,4.925714


In [None]:
base = alt.Chart(join_df).mark_line().encode(
alt.X('hours(datetime):T', title="hours"),
alt.Y("Dry Bulb Temperature").scale(zero=False)
).properties(width=100, height=100)

text1 = base.mark_text(baseline="top").encode(
    y=alt.value(12),
    x=alt.value(12),
    # all the same value on a given day, so mean is taking the unique
    text=alt.Text("global var", aggregate="mean", format=".1f", title="glob") 
)

text2 = base.mark_text(baseline="bottom", fontWeight="bold").encode(
    y=alt.value(12),
    x=alt.value(12),
    # all the same value on a given day, so mean is taking the unique
    text=alt.Text("aftenoon var", aggregate="mean", format=".1f") 
)

chart = (base + text1 + text2).facet(column=alt.Column('date(datetime):O', title="day"))
chart

In [198]:
chart.save(get_todays_save_path() / "faceted_temps_and_var.png")