In [None]:
# Import libraries
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
from matplotlib.dates import ConciseDateFormatter, AutoDateLocator
import seaborn as sns

from statsmodels.tsa.deterministic import DeterministicProcess
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

In [None]:
weather = pd.read_csv('./data/weather_data.csv')
fremont = weather[weather.NAME == 'FREMONT CANYON CALIFORNIA, CA US'].copy()
fremont['DATE'] = pd.to_datetime(fremont['DATE'])
fremont = fremont.set_index('DATE')
fremont = fremont.asfreq('d')

# Backfill empty dates
fremont.loc['2016-02-20'] = fremont.loc['2016-02-19']
fremont.loc['2016-02-21'] = fremont.loc['2016-02-20']
fremont.loc['2021-07-22'] = fremont.loc['2021-07-21']
fremont.loc['2023-12-31'] = fremont.loc['2023-12-30']

# Fremont train data before 2024
fremont_train = fremont[fremont.index < '2024-01-01']
fremont_valid = fremont[(fremont.index >= '2024-01-01') & (fremont.index < '2025-01-01')]
fremont_valid_y = fremont_valid.TAVG

FIGSIZE = (28,10)

In [None]:
fremont

In [None]:
plt.figure(figsize=FIGSIZE)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("Average Temp. (F)")
plt.title("Average Daily Temperature (2015 - 2025)")
plt.suptitle("FREMONT CANYON CALIFORNIA, CA US")

ax = plt.gca()
locator = AutoDateLocator()
formatter = ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

G = sns.lineplot(ax=ax, data=fremont, x=fremont.index, y=fremont.TAVG, linewidth=1)

In [None]:
year_ma = fremont.TAVG.rolling(window=365, center=True, min_periods=180).mean()
month_ma = fremont.TAVG.rolling(window=30, center=True, min_periods=15).mean()
week_ma = fremont.TAVG.rolling(window=7, center=True).mean()

plt.figure(figsize=FIGSIZE)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("Average Temp. (F)")

ax = plt.gca()
locator = AutoDateLocator()
formatter = ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

sns.lineplot(ax=ax, data=fremont, x=fremont.index, y=fremont.TAVG, alpha=.2, linewidth=1.5, label="Daily Temperature")
sns.lineplot(ax=ax, x=year_ma.index, y=year_ma, linewidth=3, label="Year MA")
sns.lineplot(ax=ax, x=month_ma.index, y=month_ma, linewidth=3, label="Month MA")

In [None]:
linear_model = LinearRegression()
linear_pipeline = Pipeline(steps=[('model', linear_model)])

linear_model_dp = DeterministicProcess(index=fremont_train.index, constant=True, order=1, drop=True)
linear_model_X = linear_model_dp.in_sample()
linear_model_X_valid = linear_model_dp.out_of_sample(steps=366)
linear_model_y = fremont_train.TAVG
linear_pipeline.fit(linear_model_X, linear_model_y)
linear_model_train_preds = pd.Series(linear_pipeline.predict(linear_model_X), index=linear_model_X.index)
linear_model_valid_preds = pd.Series(linear_pipeline.predict(linear_model_X_valid), index=linear_model_X_valid.index)

# Test model accuracy
linear_model_mae = mean_absolute_error(fremont_valid_y, linear_model_valid_preds)
linear_model_mape = mean_absolute_percentage_error(fremont_valid_y, linear_model_valid_preds)
linear_model_accuracy = 1 - linear_model_mape

plt.figure(figsize=FIGSIZE)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("Average Temp. (F)")
plt.title(f"Linear Model Prections (MAE: {linear_model_mae:.2f}) - (MAPE: {(linear_model_mape * 100):.2f}%) - (Accuracy: {(linear_model_accuracy * 100):.2f}%)")

ax = plt.gca()
locator = AutoDateLocator()
formatter = ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

sns.lineplot(ax=ax, data=fremont, x=fremont.index, y=fremont.TAVG, alpha=.2, linewidth=1.5, label="Daily Temperature")
sns.lineplot(ax=ax, x=linear_model_train_preds.index, y=linear_model_train_preds, linewidth=3, label="Training Data")
sns.lineplot(ax=ax, x=linear_model_valid_preds.index, y=linear_model_valid_preds, linewidth=3, label="Model Predictions")