In [19]:
library(dplyr)
library(readr)

In [20]:
energy <- "electricity"
hourlyPath  <- paste0("../../curated/", energy, "/gold_hourly_",     energy, ".csv")
dailyPath   <- paste0("../../curated/", energy, "/gold_daily_",      energy, ".csv")
monthlyPath <- paste0("../../curated/", energy, "/gold_monthly_",    energy, ".csv")


In [21]:
# Hourly
energyHourly      <- read_csv(hourlyPath, show_col_types = FALSE)
weatherHourly   <- read_csv("../../curated/weather/gold_hourly_weather.csv", show_col_types = FALSE)

# Daily
energyDaily       <- read_csv(dailyPath, show_col_types = FALSE)
weatherDaily    <- read_csv("../../curated/weather/gold_daily_weather.csv", show_col_types = FALSE)

# Monthly
energyMonthly      <- read_csv(monthlyPath, show_col_types = FALSE) %>%
                    mutate(year_month = factor(paste(year, month, sep = "-")))
weatherMonthly   <- read_csv("../../curated/weather/gold_monthly_weather.csv", show_col_types = FALSE) %>%
                    mutate(year_month = factor(paste(year, month, sep = "-")))

In [22]:
dfHourly <- weatherHourly %>%
    left_join(energyHourly,   by = c("year", "month", "hour"), suffix = c("", "_energy")) %>%
    select(
        year, 
        month,
        hour,
        avg_temp,
        avg_dewpt_temp,
        avg_rel_hum_pct,
        avg_wind_dir,
        avg_wind_spd,
        avg_visib,
        avg_stn_press,
        avg_hmdx,
        avg_wind_chill,
        consumption
    )

dfDaily <- weatherDaily %>%
    left_join(energyDaily,   by = c("year", "month", "day", "date"), suffix = c("", "_energy")) %>%
    select(
        year, 
        month,
        date,
        day,
        avg_temp,
        avg_dewpt_temp,
        avg_rel_hum_pct,
        avg_wind_dir,
        avg_wind_spd,
        avg_visib,
        avg_stn_press,
        avg_hmdx,
        avg_wind_chill,
        consumption
    )

dfMonthly <- weatherMonthly %>%
    left_join(energyMonthly,   by = c("year", "month"), suffix = c("", "_energy")) %>%
    select(
        year, 
        month,
        year_month,
        avg_temp,
        avg_dewpt_temp,
        avg_rel_hum_pct,
        avg_wind_dir,
        avg_wind_spd,
        avg_visib,
        avg_stn_press,
        avg_hmdx,
        avg_wind_chill,
        consumption
    )

In [23]:
mdl_hr <-   lm(consumption ~ avg_temp * avg_dewpt_temp * avg_rel_hum_pct * avg_hmdx, data = dfHourly)
mdl_d <-    lm(consumption ~ avg_temp * avg_dewpt_temp * avg_rel_hum_pct * avg_hmdx, data = dfDaily)
mdl_m <-    lm(consumption ~ avg_temp * avg_dewpt_temp * avg_rel_hum_pct * avg_hmdx, data = dfMonthly)

In [24]:
mdl_stats <- data.frame(
    Models = c("Hourly", "Daily", "Monthly"),
    `R2` = c(
        summary(mdl_hr)$r.squared,
        summary(mdl_d)$r.squared,
        summary(mdl_m)$r.squared
    ),
    `Adj_R2` = c(
        summary(mdl_hr)$adj.r.squared,
        summary(mdl_d)$adj.r.squared,
        summary(mdl_m)$adj.r.squared
    )
)

mdl_stats

Models,R2,Adj_R2
<chr>,<dbl>,<dbl>
Hourly,0.03338753,0.03336032
Daily,0.32677479,0.3126314
Monthly,0.92541361,0.8284513
