In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
from functools import partial

import polars as pl
import altair as alt

from weather.helpers.epw_read import read_epw
from weather.helpers.weather_data import PALO_ALTO_20
from weather.helpers.filter import filter_df_by_month

In [58]:
df = read_epw(PALO_ALTO_20.path)
month_filter = partial(filter_df_by_month, df, PALO_ALTO_20)
june = month_filter(7).filter(pl.col("datetime").dt.day() != 30). select(["datetime", "Dry Bulb Temperature"]) # last day has only 23 values intead of 24..
assert (june["datetime"].dt.date().unique_counts().unique() == 24).all()



## examining differences between hours to see when large changes first occur - searching for T*

In [61]:
hourly_deriv_df = june.with_columns(diff=pl.col("Dry Bulb Temperature").diff().fill_null(strategy="zero"))
print(hourly_deriv_df.head())

alt.Chart(hourly_deriv_df).mark_circle().encode(
alt.X('hours(datetime):T'),
alt.Y("diff"))

shape: (5, 3)
┌─────────────────────┬──────────────────────┬──────┐
│ datetime            ┆ Dry Bulb Temperature ┆ diff │
│ ---                 ┆ ---                  ┆ ---  │
│ datetime[μs]        ┆ f64                  ┆ f64  │
╞═════════════════════╪══════════════════════╪══════╡
│ 2020-07-01 00:00:00 ┆ 17.6                 ┆ 0.0  │
│ 2020-07-01 01:00:00 ┆ 17.1                 ┆ -0.5 │
│ 2020-07-01 02:00:00 ┆ 16.5                 ┆ -0.6 │
│ 2020-07-01 03:00:00 ┆ 16.0                 ┆ -0.5 │
│ 2020-07-01 04:00:00 ┆ 15.4                 ┆ -0.6 │
└─────────────────────┴──────────────────────┴──────┘


### correlation between morning temperature path and temp before T*

In [59]:
morn_df = june.filter((pl.col("datetime").dt.hour()==0) | (pl.col("datetime").dt.hour()==4)).with_columns(hour=pl.col("datetime").dt.hour(), date=pl.col("datetime").dt.date()).pivot(on="hour", index="date", values="Dry Bulb Temperature").with_columns(deltaT=pl.col("0")-pl.col("4"))
morn_df.head()

date,0,4,deltaT
date,f64,f64,f64
2020-07-01,17.6,15.4,2.2
2020-07-02,15.5,13.4,2.1
2020-07-03,15.8,14.3,1.5
2020-07-04,16.5,16.0,0.5
2020-07-05,19.2,17.3,1.9


In [60]:
alt.Chart(morn_df).mark_circle().encode(
alt.X('0:Q').scale(zero=False),
alt.Y("deltaT:Q").scale(zero=False))

### Relationship between values around T* and peak temp

In [85]:
t_star  =6
tstar_peak_df = hourly_deriv_df.group_by(pl.col("datetime").dt.date(), maintain_order=True).agg(
    pl.col("diff").filter(pl.col("datetime").dt.hour() == t_star).first().alias(f"deriv at {t_star}am"),
    pl.col("Dry Bulb Temperature").max().alias("max temp")
)
tstar_peak_df.head()

datetime,deriv at 6am,max temp
date,f64,f64
2020-07-01,0.2,24.0
2020-07-02,0.8,23.0
2020-07-03,0.1,21.0
2020-07-04,0.2,27.0
2020-07-05,0.3,27.6


In [86]:
alt.Chart(tstar_peak_df).mark_circle().encode(
alt.X(f"deriv at {t_star}am:Q").scale(zero=False),
alt.Y("max temp:Q").scale(zero=False))

In [87]:
tstar_peak_no_deriv_df = hourly_deriv_df.group_by(pl.col("datetime").dt.date(), maintain_order=True).agg(
    pl.col("Dry Bulb Temperature").filter(pl.col("datetime").dt.hour() == t_star).first().alias(f"{t_star}am temp"),
    pl.col("Dry Bulb Temperature").max().alias("max temp")
)
tstar_peak_no_deriv_df.head()



datetime,6am temp,max temp
date,f64,f64
2020-07-01,16.2,24.0
2020-07-02,14.8,23.0
2020-07-03,15.1,21.0
2020-07-04,17.2,27.0
2020-07-05,18.3,27.6


In [88]:
alt.Chart(tstar_peak_no_deriv_df).mark_circle().encode(
alt.X(f'{t_star}am temp:Q').scale(zero=False),
alt.Y("max temp:Q").scale(zero=False))

### Looking at elevated temperature characteristics.. 

In [None]:
tstar_peak_no_deriv_df