In [105]:
# install the required packages first
require(jsonlite)
require(httr)
require(data.table)

get_token <- function(username, password, url_site){
    
    post_body = list(username=username,password=password)
    post_url_string = paste0(url_site,'/token/')
    result = POST(post_url_string, body = post_body)

    # error handling (wrong credentials)
    if(result$status_code==400){
        print('Check your credentials')
        return(0)
    }
    else if (result$status_code==201){
        output = content(result)
        token = output$key
    }

    return(token)
}

get_data <- function(start_date='2020-03-20', token, url_site){
    
    post_body = list(start_date=start_date,username=username,password=password)
    post_url_string = paste0(url_site,'/dataset/')
    
    header = add_headers(c(Authorization=paste('Token',token,sep=' ')))
    result = GET(post_url_string, header, body = post_body)
    output = content(result)
    data = data.table::rbindlist(output)
    data[,event_date:=as.Date(event_date)]
    data = data[order(event_date)]
    return(data)
}


send_submission <- function(predictions, token, url_site, submit_now=F){
    
    format_check=check_format(predictions)
    if(!format_check){
        return(FALSE)
    }
    
    post_string="list("
    for(i in 1:nrow(predictions)){
        if(i<nrow(predictions)){
            post_string=sprintf("%s%s,",post_string,predictions$forecast[i])
        } else {
            post_string=sprintf("%s%s)",post_string,predictions$forecast[i])
        }
    }
    
    submission = eval(parse(text=post_string))
    json_body = jsonlite::toJSON(submission, auto_unbox = TRUE)
    submission=list(submission=json_body)
    
    print(submission)
    # {"31515569":2.4,"32939029":2.4,"4066298":2.4,"6676673":2.4,"7061886":2.4,"85004":2.4} 

    if(!submit_now){
        print("You did not submit.")
        return(FALSE)      
    }
    

    header = add_headers(c(Authorization=paste('Token',token,sep=' ')))
    post_url_string = paste0(url_site,'/submission/')
    result = POST(post_url_string, header, body=submission)
    
    if (result$status_code==201){
        print("Successfully submitted. Below you can see the details of your submission")
    } else {
        print("Could not submit. Please check the error message below, contact the assistant if needed.")
    }
    
    print(content(result))
    
}

check_format <- function(predictions){
    
    if(is.data.frame(predictions) | is.data.frame(predictions)){
        if('forecast' %in% names(predictions)){
            if(nrow(predictions)==24){
                if(all(is.numeric(predictions$forecast))){
                    print("Format OK")
                    return(TRUE)
                } else {
                    print("forecast information is not numeric")
                    return(FALSE)                
                }
            } else {
                print("Forecasts for 24 hours should be provided, current number of rows:")
                print(nrow(predictions))
                return(FALSE)     
            }
        } 
    } else {
        print("Wrong format. Please provide data.frame or data.table object")
        return(FALSE)
    }
    
}

# this part is main code
subm_url = 'http://46.101.124.77'

u_name = "Group10"
p_word = "sSQe4kg1ne5XiB7U"
submit_now = FALSE

username = u_name
password = p_word

token = get_token(username=u_name, password=p_word, url=subm_url)
data = get_data(token=token,url=subm_url)


In [106]:
library(data.table)
library(lubridate, quietly=TRUE)
library(zoo, quietly = TRUE)
library(dplyr, quietly = TRUE)
library(glmnet)
library(MLmetrics)
library("mvtnorm") 
library(tidyr)
library(ggplot2)

In [107]:
dt=fread("bulk_consumption_with_temp.csv")

In [108]:
str(dt)

Classes 'data.table' and 'data.frame':	35232 obs. of  10 variables:
 $ Date       : chr  "2017-01-01" "2017-01-01" "2017-01-01" "2017-01-01" ...
 $ Hour       : int  0 1 2 3 4 5 6 7 8 9 ...
 $ Consumption: num  25409 24166 22652 21578 21018 ...
 $ T_1        : num  -15.9 -15.9 -15.9 -15.9 -15.9 ...
 $ T_2        : num  4.18 4.18 4.18 4.18 4.18 4.18 4.18 4.33 4.48 4.63 ...
 $ T_3        : num  0.89 0.89 0.89 0.89 0.89 0.89 0.89 0.92 0.95 0.98 ...
 $ T_4        : num  -19 -19 -19 -19 -19 ...
 $ T_5        : num  -14.8 -14.8 -14.8 -14.8 -14.8 ...
 $ T_6        : num  -10.7 -10.7 -10.7 -10.7 -10.7 ...
 $ T_7        : num  2.16 2.16 2.16 2.16 2.16 2.16 2.16 1.82 1.48 1.14 ...
 - attr(*, ".internal.selfref")=<externalptr> 


In [109]:
str(data)

Classes 'data.table' and 'data.frame':	888 obs. of  10 variables:
 $ event_date : Date, format: "2021-01-08" "2021-01-08" ...
 $ event_hour : int  14 12 11 10 9 8 7 6 5 2 ...
 $ consumption: num  40986 40496 42005 41592 40459 ...
 $ t_1        : num  9.16 8.75 8.08 7.08 3.74 1.15 1.29 1.31 1.69 2.56 ...
 $ t_2        : num  18.6 17 15.7 14 11.7 ...
 $ t_3        : num  18.3 16.4 14.6 12.5 11.1 ...
 $ t_4        : num  12.26 10.03 8.62 6.98 4.32 ...
 $ t_5        : num  14.4 13 11.1 8.7 7.5 ...
 $ t_6        : num  10.79 9.55 8.63 7.32 5.44 ...
 $ t_7        : num  15.4 14.4 13.9 13.2 12.8 ...
 - attr(*, ".internal.selfref")=<externalptr> 


In [110]:
setnames(data, "event_date", "Date")
setnames(data, "event_hour", "Hour")
setnames(data, "consumption", "Consumption")
setnames(data, "t_1", "T_1")
setnames(data, "t_2", "T_2")
setnames(data, "t_3", "T_3")
setnames(data, "t_4", "T_4")
setnames(data, "t_5", "T_5")
setnames(data, "t_6", "T_6")
setnames(data, "t_7", "T_7")

In [111]:
data$Date=as.Date(data$Date)
dt$Date=as.Date(dt$Date)

In [112]:
data=rbind(dt,data)

### Performance Function

In [113]:
perf_dt=function(type,actual,forecast){
    name=type
    n=length(actual)
    error=actual-forecast
    mean=mean(actual)
    sd=sd(actual)
    FBias=sum(error)/sum(actual)
    MPE=sum(error/actual)/n
    MAPE=sum(abs(error/actual))/n
    RMSE=sqrt(sum(error^2))/n
    MAD=sum(abs(error))/n
    WMAPE=MAD/mean
    l=data.frame(name,n,mean,sd,FBias,MAPE,RMSE,MAD,WMAPE)
    return(l)
}

# Lagged Base Model

In [114]:
dt_lagged=copy(data)
dt_lagged[, Lag_48:=shift(Consumption,type="lag",n=48)]
dt_lagged[, Lag_168:=shift(Consumption,type="lag",n=168)]

In [115]:
train=filter(dt_lagged,Date<"2021-01-08")

In [116]:
test=filter(dt_lagged,Date>"2021-01-08" & Date<="2021-01-22")

## Hourly Performance

In [117]:
head(train)

Date,Hour,Consumption,T_1,T_2,T_3,T_4,T_5,T_6,T_7,Lag_48,Lag_168
2017-01-01,0,25409.31,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,
2017-01-01,1,24166.14,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,
2017-01-01,2,22652.02,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,
2017-01-01,3,21578.05,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,
2017-01-01,4,21018.2,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,
2017-01-01,5,20953.17,-15.88,4.18,0.89,-18.96,-14.77,-10.68,2.16,,


In [118]:
perf_dt("Lag 48 for hourly prediction", test$Consumption, test$Lag_48)
perf_dt("Lag 168 for hourly prediction", test$Consumption, test$Lag_168)

name,n,mean,sd,FBias,MAPE,RMSE,MAD,WMAPE
Lag 48 for hourly prediction,336,37045.79,5444.545,0.008716181,0.1713854,403.6515,6137.928,0.1656849


name,n,mean,sd,FBias,MAPE,RMSE,MAD,WMAPE
Lag 168 for hourly prediction,336,37045.79,5444.545,0.04073002,0.1580881,394.6597,5804.765,0.1566916


## Daily Performance

In [101]:
test %<>% 
  group_by(Date) %>% 
  summarise(Consumption = sum(Consumption),
            Lag_2 = sum(Lag_48),
            Lag_7= sum(Lag_168)
           )

`summarise()` ungrouping output (override with `.groups` argument)


In [103]:
perf_dt("Lag 2 for daily prediction", test$Consumption, test$Lag_2)
perf_dt("Lag 7 for daily prediction", test$Consumption, test$Lag_7)

name,n,mean,sd,FBias,MAPE,RMSE,MAD,WMAPE
Lag 2 for daily prediction,14,889099,61616.14,0.008716181,0.07486176,21854.45,64694.99,0.07276466


name,n,mean,sd,FBias,MAPE,RMSE,MAD,WMAPE
Lag 7 for daily prediction,14,889099,61616.14,0.04073002,0.04087209,12136.03,36213.03,0.04073002
