In [4]:
%load_ext autoreload
%autoreload 2

In [19]:
from functools import partial

import polars as pl
import altair as alt
from scipy.stats import linregress
import numpy as np

from weather.helpers.epw_read import read_epw
from weather.helpers.weather_data import PALO_ALTO_20
from weather.helpers.filter import filter_df_by_month
from weather.helpers.figures import get_todays_save_path

In [2]:
df = read_epw(PALO_ALTO_20.path)
month_filter = partial(filter_df_by_month, df, PALO_ALTO_20)
june = (
    month_filter(7)
    .filter(pl.col("datetime").dt.day() != 30)
    .select(["datetime", "Dry Bulb Temperature"])
)  # last day has only 23 values intead of 24..
assert (june["datetime"].dt.date().unique_counts().unique() == 24).all()

In [36]:
morn_df = (
    june.filter(
        (pl.col("datetime").dt.hour() == 0) | (pl.col("datetime").dt.hour() == 4) | (pl.col("datetime").dt.hour() == 5)
    )
    .with_columns(hour=pl.col("datetime").dt.hour(), date=pl.col("datetime").dt.date())
    .pivot(on="hour", index="date", values="Dry Bulb Temperature")
    .with_columns(deltaT=pl.col("0") - pl.col("4"))
)
morn_df.head()

date,0,4,5,deltaT
date,f64,f64,f64,f64
2020-07-01,17.6,15.4,16.0,2.2
2020-07-02,15.5,13.4,14.0,2.1
2020-07-03,15.8,14.3,15.0,1.5
2020-07-04,16.5,16.0,17.0,0.5
2020-07-05,19.2,17.3,18.0,1.9


In [38]:
morn_fit = linregress(morn_df["0"], morn_df["4"])
morn_fit

LinregressResult(slope=np.float64(0.7582940395855099), intercept=np.float64(2.8352147273908024), rvalue=np.float64(0.7926789266301113), pvalue=np.float64(2.962355444713497e-07), stderr=np.float64(0.11223580570656057), intercept_stderr=np.float64(1.9293372262011512))

In [18]:
def predict_4am(temp_at_midnight):
    # y = mx + b 
    # 4am_temp = fit.slope * tem_at_midnight + fit.intercept 
    return (morn_fit.slope * temp_at_midnight) + morn_fit.intercept

In [None]:
data = alt.Chart(morn_df).mark_circle().encode(
    alt.X("0:Q").scale(zero=False), alt.Y("4:Q").scale(zero=False)
)

x = np.arange(start=15.5, step=0.5, stop=20)
source = pl.DataFrame({"x": x}).with_columns(pl.col("x").map_elements(predict_4am, return_dtype=pl.Float64).alias("y"))

source

base = alt.Chart(source).mark_line().encode(
    x="x:Q",
    y="y:Q",
    color=alt.value("black")
)

data + base



In [11]:
morn_dt_fit = linregress(morn_df["0"], morn_df["deltaT"])
morn_dt_fit

LinregressResult(slope=np.float64(0.24170596041449002), intercept=np.float64(-2.835214727390797), rvalue=np.float64(0.3828712927200296), pvalue=np.float64(0.04036541490268517), stderr=np.float64(0.11223580570656054), intercept_stderr=np.float64(1.9293372262011508))

In [15]:
def predict_4am(temp_at_midnight):
    # y = mx + b 
    # 4am_temp = fit.slope * tem_at_midnight + fit.intercept 
    return (morn_fit.slope * temp_at_midnight) + morn_fit.intercept

In [16]:
predict_4am(morn_df["0"][0])

np.float64(16.181189824095778)