# Consumption Data Generation:

### 0) Script Initialization
    A) Workspace and Package Intialization
    B) Function Definition
	
### 1) Secondly Data Processing
    A) Loading Secondly Data
    B) Aggregating to 30s, 5min, 30min
    C) Calculating Perfect Forecast Error (30min->30s)
	
### 2) 5min Data Processing:
    A) Loading Data (5min samples)
    B) Aggregating Data (5min -> 30min)
    C) Fitting the ARIMA model
	
### 4) Scenario Generation
    A) Initializing the scenario matrices
    B) Generating the scenarios for each T_EMS
    C) Saving the data
__________________________________________________________________________

### 0) Script Initialization
    A) Workspace and Package Intialization
    B) Function Definition

In [18]:
# 0.A) Workspace Initialization

# Clear the workspace
rm(list=ls()) # remove all variables from workspace
# set.seed(1)

# Parameter initialization:

n_scen = 5
t.ems = 15           # EMS period in [min]
t.ems.seg = 60*t.ems # EMS period in [s]
t.int = 1.0          # Intra period in [min]
t.int.seg = 60*t.int # Intra period in [s]
t.for = 30           # Forecast period in [min]
t.for.seg = 60*t.for # Forecast period in [s]

my.resolution = t.for/t.ems
my.41 = (24*60/t.ems)/my.resolution
my.2880 = 24*60/t.int
my.288 = 24*60/t.ems


# Load packages:
require(R.matlab)
require(xlsx)
require(ggplot2)
require(reshape2)
require(forecast)


In [19]:
# 0.B) - Function definition:

# enlarge: Given a small vector it creates a larger one by repeating its components
# small       <- original vector 
# large.val   <- times we want to multiply it
enlarge <- function(small,large.val)
{
  large <- 0
  for(i in 1:length(small)){
    large <- c(large,rep(small[i],large.val))
  }       
  large <- large[-1]
  return(large)
}

# chunk: Given a vector it divides it
# x     <- original vector
# n     <- number of subvectors generated
chunk <- function(x,n) 
{
  split(x, cut(seq_along(x), n, labels = FALSE))
}

### 1) Secondly Data Processing
    A) Loading Secondly Data
    B) Aggregating to 30s, 5min, 30min
    C) Calculating Perfect Forecast Error (30min->30s)

In [20]:
### 1.A) Load secondly data:

# Secondly data
days = 1
h.first = 0
h.last = 23

for(d in 1:days)
{
  for(i in h.first:h.last) # hours with interest in terms of radiation
  {
    # Define interval
    ini = i
    end = i+1
    
    # Load data 
    str = paste('X.data',ini,'_',end,'_',d,' = read.xlsx2("../../Data/Original Data/DIA ',d,'/',ini,'_',end,'.xlsx", sheetName = "Datos", header = T, dec=",")',sep = '')
    eval(parse(text = str))
    
    # Rename categories
    str2 = paste('names(','X.data',ini,'_',end,'_',d,') <- c("fecha","p.cons","p.act.pv","p.act.bat","p.act.total","p.disp.pv")',sep = "")
    eval(parse(text = str2))
  }
}
# We obtain data in arrays with the name X.Data10_11_1 (hour1_hour2_day)

# Obtaining the merged vectors
# Consumption
cons_seg = c()
for(d in 1:days)
{
  for(i in h.first:h.last) # hours with interest in terms of radiation
  {
    # Define interval
    ini = i
    end = i+1
    
    str = paste('X.data',ini,'_',end,'_',d,'$p.cons <- as.numeric(gsub(",",".",levels(X.data',ini,'_',end,'_',d,'$p.cons)))[X.data',ini,'_',end,'_',d,'$p.cons]',sep = '')
    eval(parse(text = str))
    
    # Correct missing values
    len_final = 3600
    str3 = paste('origin = X.data',ini,'_',end,'_',d,'$p.cons',sep = "")
    eval(parse(text = str3))
    
    ## Interpolating to a vector of 3600 values, since there are missing values
    ## approximate y_fin from x_ini and y_in=origin to for x_fin=xout
    
    origin <- origin[!is.na(origin)] # same as "origin <- na.omit(origin)"
    out = approx(0:(length(origin)-1)/(length(origin)-1),origin, xout = 0:(len_final-1)/(len_final-1))
    
    cons_seg = c(cons_seg,out$y)
      
    str3 = paste('rm(X.data',ini,'_',end,'_',d,')',sep = "")
    eval(parse(text = str3))
  }
}

rm(days,h.first,h.last)
# plot.ts(ts(cons_seg,frequency = 24*3600))

In [21]:
### 1.B) Aggregating Secondly Data:

# calculate mean 5min consumption (group sec in 5min block, and find mean)
cons_seg_5min <- sapply(chunk(cons_seg,length(cons_seg)/t.ems.seg),mean)
# calculate mean 30s consumption (from secondly data)
cons_seg_30seg <- sapply(chunk(cons_seg,length(cons_seg)/t.int.seg),mean)
# calculate mean 30min consumption (from secondly data)
cons_seg_for <- sapply(chunk(cons_seg,length(cons_seg)/t.for.seg),mean)


In [22]:
### 1.C) Calculating Perfect Forecast Error (when going from 30min to 30s):

cons_perf_for_30seg <- spline(1:length(cons_seg_for), cons_seg_for, n = length(cons_seg_30seg))$y

err_cons_30seg_perfect <- cons_seg_30seg-cons_perf_for_30seg
mean_err_cons_perf <- mean(err_cons_30seg_perfect)
sd_err_cons_perf <- sd(err_cons_30seg_perfect)

rm(cons_perf_for_30seg,err_cons_30seg_perfect,mean_err_cons_perf)

### 2) 5min Data Processing:
    A) Loading Data (5min samples)
    B) Aggregating Data (5min -> 30min)
    C) Fitting the ARIMA model

In [23]:
### A) Load 5min data: 

# Data given with 5min resulotion (only weekdays, no weekends)

# Reading data
day = c(3:7,10:14,17:21,24:25)
month = rep("Agost",length(day))  

## Read in the data from CSV files

range.interest = 1:288 # Data is given with 5min timestep ((60/5=12)*24)=288
cons2.all = c()
for(n in 1:length(month))
{
  # paste() - str convatenation !!!
  str = paste('X <- read.table("../../Data/Original Data/TS ANALYSIS/',month[n],day[n],'.csv",header=T, fill = T, sep = ";")',sep='')
  # Parse the above string as an R command
  eval(parse(text = str))
  # Just keep the last 2 columns (required data)
  X <- X[,6:7]
  # Name them cons and pv.gen
  names(X) <- c("cons","pv.gen")
  # Replace , with . (decimal representation) and parse as numeric
  X$cons <- as.numeric(gsub(",",".",levels(X$cons)))[X$cons]
  
  cons2.all = c(cons2.all,X$cons[range.interest]) # c() - attach to whole vector
}

cons.ts = cons2.all

rm(cons2.all)

In [24]:
### A2) Load 5min data for the 26th of Aug
month = "Agost" 
day = 26
cons.ts.26 = c()
for(n in 1:length(month))
{
  str = paste('X <- read.table("../../Data/Original Data/DIA ',n,' - 5min/',month[n],day[n],'.csv",header=T, fill = T, sep = ";")',sep='')
  eval(parse(text = str))
  X <- X[,6:7]
  names(X) <- c("cons","pv.gen")
  X$cons <- as.numeric(gsub(",",".",levels(X$cons)))[X$cons]
  
  cons.ts.26 = c(cons.ts.26,X$cons)
}
rm(month,day,X,str)

In [25]:
### B) Redefine resolution and convert to time series:

## This command redefines cons.ts with cons.ts (1 7 13 ...) - each 30th minute
cons.ts = cons.ts[seq(from = 1, to = length(cons.ts), by = my.resolution)]
cons.ts.26 = cons.ts.26[seq(from = 1, to = length(cons.ts.26), by = my.resolution)]

## creates a time series with a given frequency (number of observations per unit time - 288/7 obs/day)
cons.ts = ts(cons.ts, frequency = floor(my.41)) # Originally it was 288 obs/day, but we used 30min spacing => 288/7
cons.ts.26 = ts(cons.ts.26, frequency = floor(my.41))
# plot.ts(cons.ts)


In [26]:
# C) Model definition (from 2-TS analysis)

# model estimation
# 1) ARMA(0,1) MA(1)48 - AIC = 11016.9
# 2) ARMA(1,0) MA(1)48 - AIC = 11016.61
# 3) ARMA(1,1) MA(1)48 - AIC = 10989.16

cons.ts.arima3 <- arima(cons.ts,order=c(1,1,1),
                        seasonal=list(order=c(0,1,1),period=(my.41)))
# cons.ts.arima3

# pred2 <- predict(cons.ts.arima3,n.ahead=my.41, level=c(0.3,.7))
# mean.cons=pred2$pred;
# sd.cons=pred2$se
# # plot(pred2$se)

### 3) Forecast Generation
    A) Generating 24h ahead forecast for each 30min interval

In [27]:
### A) Generate 24h ahead forecast for each 30min interval of Aug 26th

# Aggregate the 5min consumption for the 26th of Aug (cons.ts - just till 25th)
total_real_values = c(cons.ts,cons.ts.26)

# Initialize the forecast matrices:
my.42 = my.41+1 # we need 1 more point to have 24h prediction for each T_EMS
for_mean_mat = matrix(data=c(1:(my.41*my.42)),ncol=my.42)
for_sd_mat   = matrix(data=c(1:(my.41*my.42)),ncol=my.42)

for(bloc in 1:my.41)
{   
  # Data available at the time we're doing the prediction
  known_data = total_real_values[1:(length(cons.ts)+bloc-1)]
  
  # Refit the model to this data and predict next 24h (48 values):
  refit <- Arima(known_data, model=cons.ts.arima3) 
  pred <- predict(refit,n.ahead=my.42)
  
  for_mean_mat[bloc,] <- pred$pred
  for_sd_mat[bloc,]   <- pred$se
  
}

rm(refit,bloc,pred,total_real_values)

### 4) Scenario Generation
    A) Initializing the scenario matrices
    B) Generating the scenarios for each T_EMS
    C) Saving the data

In [28]:
#### A) Initialize the scenario matrices:

scen.cons <- matrix(0,nrow = my.2880, ncol = n_scen)

scen.cons.all <- array(rep(1, my.2880*n_scen*my.288), dim=c(my.2880, n_scen, my.288))

In [29]:
### B) Create 24h ahead scenarios for each EMS period:

# For different EMS periods inside the same forecast period 
# we just shift the 2880 window to match the EMS's 24h ahead

for(p in 1:my.41)
{    
    # For each EMS period (inside forecast)
    for(j in 0:(t.for/t.ems-1)) 
    {    
        # For each scenario
        for(i in 1:n_scen)
        {
          ## Long term (35min) randomness:
          # Consumption (based on obtained ARIMA model - looking (24h+T_for) ahead)
          f.c <- rnorm(my.42, mean = for_mean_mat[p,], sd = for_sd_mat[p,])
          s.c <- spline(1:length(f.c), f.c, n = (my.2880+(t.for/t.int)))$y
          # shift the scenario to match current T_ems:
          scen.cons[,i] <- s.c[(1+j*(t.ems/t.int)):(my.2880+j*(t.ems/t.int))]

          ### Short term (30s) randomness:
          ## Consumption (normal) - add the 30s perfect forecast error
          scen.cons[,i] <- scen.cons[,i] + rnorm(length(scen.cons[,i]),mean = 0, sd = sd_err_cons_perf) 
          # sd_err_cons_perf - sd for a perfect 30s forecast (when we mean secondly data to 30s intervals)

        }
        # str1 = paste("Generated Data/Scenarios/Cons/scen_",((p-1)*t.for/t.ems+j+1),".mat",sep = '')
        # writeMat(load = scen.cons, con=str1)
        scen.cons.all[,,((p-1)*t.for/t.ems+j+1)] <- scen.cons
    }
}



In [30]:
### C) Save the obtained data:

writeMat(cons_seg = cons_seg, con="../../Data/Generated Data/1 - Secondly/cons_seg.mat")
writeMat(cons_scen = scen.cons.all, con="../../Data/Generated Data/2 - Scenarios/cons_scen.mat")