# PV Scenarios Generation:

### 0) Script Initialization
    A) Workspace and Package Intialization
    B) Function Definition
	
### 1) Secondly Data Processing
	A) Secondly Data Loading
	B) Secondly Data Aggregation
	C) Perfect Forecast Formation
	D) Clody - Clear-sky Intervals Separation
	E) Model Fitting
	
### 2) Scenario Generation
    A) Initialize the scenario matrices
    B) Separate the cloudy and clearsky
    C) Generate 24h ahead scenarios for each EMS period
    D) Save the obtained data
__________________________________________________________________________

### 0) Script Initialization
    A) Workspace and Package Intialization
    B) Function Definition

In [15]:
# 0.A) Workspace Initialization

# Clear the workspace
rm(list=ls()) # remove all variables from workspace
set.seed(12345)

#######################################################
# REDUCE THE GENERATION by 15% !!! (for better match) #
#######################################################
GEN_REDUCTION <- 0.85

# Parameter initialization:

var.treshold <- 200000

n_scen = 5
t.ems = 15           # EMS period in [min]
t.ems.seg = 60*t.ems # EMS period in [s]
t.int = 1.0          # Intra period in [min]
t.int.seg = 60*t.int # Intra period in [s]
t.for = 30           # Forecast period in [min]
t.for.seg = 60*t.for # Forecast period in [s]

my.resolution = t.for/t.ems
my.41 = (24*60/t.ems)/my.resolution
my.2880 = 24*60/t.int
my.288 = 24*60/t.ems


# Load packages:
require(R.matlab)
require(xlsx)
require(ggplot2)
require(reshape2)
require(forecast)
require(nleqslv)


In [16]:
# 0.B) - Function definition:

# enlarge: Given a small vector it creates a larger one by repeating its components
# small       <- original vector 
# large.val   <- times we want to multiply it
enlarge <- function(small,large.val)
{
  large <- 0
  for(i in 1:length(small)){
    large <- c(large,rep(small[i],large.val))
  }       
  large <- large[-1]
  return(large)
}

# chunk: Given a vector it divides it
# x     <- original vector
# n     <- number of subvectors generated
chunk <- function(x,n) 
{
  split(x, cut(seq_along(x), n, labels = FALSE))
}

### 1) Secondly Data Processing
	A) Secondly Data Loading
	B) Secondly Data Aggregation
	C) Perfect Forecast Formation
	D) Clody - Clear-sky Intervals Separation
	E) Model Fitting

In [17]:
### A) Secondly Data Loading:

# Secondly Data
d = 3
h.first = 0 # 8:00 - 9:00
h.last = 23

# Read in the XLSX hourly files:
for(i in h.first:h.last)
{
    # Define interval
    ini = i
    end = i+1

    # Load data 
    str = paste('X.data',ini,'_',end,'_',d,' = read.xlsx2("../../Data/Original Data/DIA ',d,'/',ini,'_',end,'.xlsx", sheetName = "Datos", header = T, dec=",")',sep = '')
    eval(parse(text = str))

    # Rename categories
    str2 = paste('names(','X.data',ini,'_',end,'_',d,') <- c("fecha","p.disp.pv","p.cons.pv","p.act.pv")',sep = "")
    eval(parse(text = str2))
}


# Process the PV generation: 
gen_seg = c()

for(i in h.first:h.last)
{
    # Define interval
    ini = i
    end = i+1
    
    str = paste('X.data',ini,'_',end,'_',d,'$p.disp.pv <- as.numeric(gsub(",",".",levels(X.data',ini,'_',end,'_',d,'$p.disp.pv)))[X.data',ini,'_',end,'_',d,'$p.disp.pv]',sep = '')
    eval(parse(text = str))
    
    # Correct missing values
    len_final = 3600
    str3 = paste('origin = X.data',ini,'_',end,'_',d,'$p.disp.pv',sep = "")
    eval(parse(text = str3))
    
    origin <- origin[!is.na(origin)]
    out = approx(0:(length(origin)-1)/(length(origin)-1),origin, xout = 0:(len_final-1)/(len_final-1))
    
    gen_seg = c(gen_seg,out$y)
    
    str3 = paste('rm(X.data',ini,'_',end,'_',d,')',sep = "")
    eval(parse(text = str3))
}

rm(d,h.first,h.last)

# plot(gen_seg,type="l")

In [18]:
# plot(gen_seg,type="l")
# plot(gen_seg*GEN_REDUCTION,type="l")

gen_seg <- gen_seg*GEN_REDUCTION

In [19]:
### B) Secondly Data Aggregation:

# calculate mean 5min generation (group sec in 5min block, and find mean)
gen_seg_5min <- sapply(chunk(gen_seg,length(gen_seg)/t.ems.seg),mean)
# calculate mean 30s generation (from secondly data)
gen_seg_30seg <- sapply(chunk(gen_seg,length(gen_seg)/t.int.seg),mean)
# calculate mean 30min generation (from secondly data)
gen_seg_for <- sapply(chunk(gen_seg,length(gen_seg)/t.for.seg),mean)


In [20]:
### C) Perfect Forecast Formation:

# Calculate the perfect 30min->30s forecast:
gen_perf_for_30seg <- spline(1:length(gen_seg_for), gen_seg_for, n = length(gen_seg_30seg))$y

# Calculate the perfect forecast errors:
gen_err_30seg <- gen_seg_30seg -gen_perf_for_30seg
gen_err_30seg_x41 = chunk(gen_err_30seg,my.41)
# gen_err_30seg_x288 = chunk(gen_err_30seg,my.288)


In [21]:
### D) Clody - Clear-sky Intervals Separation:

# We need to separate them because we model them differently:

# For each 30min interval -> we calculate the Variance and compare it to treshold
# (cloudy intervals will have a high variance, while clearsky will have a low one)

# var.treshold <- 200000

# We chunk the secondly data to 30min intervals, calc the var, and compare to treshold:
mesos.nuvols   <- which(as.vector(sapply(chunk(gen_seg,my.41),var))>var.treshold) # which - returns index
mesos.clearsky <- setdiff(1:my.41,mesos.nuvols)

# Separate the perfect error to clody and clearsky intervals as well:
gen.err.nuvols   <- gen_err_30seg[melt(gen_err_30seg_x41)$L1%in%mesos.nuvols]   # melt() - dechunkifies by index L1
gen.err.clearsky <- gen_err_30seg[melt(gen_err_30seg_x41)$L1%in%mesos.clearsky]
# melt() - turns the grouped object in a regular structures where $value = value and $L1 = group (chunk) index

# par(mfrow=c(1,2))
# hist(gen.err.nuvols,breaks = 25, main = "Error for cloudy intervals")
# hist(gen.err.clearsky,breaks = 25, main = "Error for clear sky intervals")
# par(mfrow=c(1,1))

In [22]:
### E) Model Fitting:

# -------------------------------------------------------------#
#  CLOUDY   intervals -> modelled with  BETA  distribution !!! #
#  CLEARSKY intervals -> modelled with NORMAL distribution !!! #
# -------------------------------------------------------------#

## I) Cloudy - Beta Fitting:

# To limit overfitting for the beta distribution -> we remove the top and bottom 10% extremes [outlier effect]
gen.lim <- as.vector(quantile(sort(gen.err.nuvols),probs = c(0.1,0.9))) # returns us the 10% and 90% error value
# gen.lim <- as.vector(quantile(sort(gen.err.nuvols),probs = c(0.05,0.95))) # returns us the 10% and 90% error value
gen.err.nuvols.cent <- gen.err.nuvols[(gen.err.nuvols>= gen.lim[1])&(gen.err.nuvols< gen.lim[2])]

# Convert to PU with min and max:
max.gen.err <- max(gen.err.nuvols.cent)
min.gen.err <- min(gen.err.nuvols.cent)
gen.err.nuvols.norm <- (gen.err.nuvols.cent-min.gen.err)/(max.gen.err-min.gen.err)

# Define the system of nonlinear equations for alpha and beta fitting:
fn2 <- function(x)
{
  y1 <- x[1]/(x[1]+x[2]) - mean(gen.err.nuvols.norm)
  # x[1]/(x[1]+x[2]) = mean(gen.err.nuvols.norm)
  y2 <- x[1]*x[2]/((x[1]+x[2])^2*(x[1]+x[2]+1)) - var(gen.err.nuvols.norm)
  # x[1]*x[2]/((x[1]+x[2])^2*(x[1]+x[2]+1)) = var(gen.err.nuvols.norm)
  y = c(y1,y2)
  return(y)
}

# Solve the system:
sol<- nleqslv(c(0.2,0.2),fn2) # Solves it to make fn2 = 0
gen.shape1 <- sol$x[1]
gen.shape2 <- sol$x[2]

## I) Clear-sky - Normal Fitting:

mean_err_gen_perf <- mean(gen.err.clearsky)
sd_err_gen_perf <- sd(gen.err.clearsky)

## Finally we have alpha and beta for hours with clouds (beta) # error !!!!
## We have the sd for clearsky hours (normal)

In [23]:
rm("fn2","gen.err.clearsky","gen.err.nuvols","gen.err.nuvols.cent","gen.err.nuvols.norm","gen.lim","gen_err_30seg","gen_err_30seg_x41","gen_perf_for_30seg","i","ini","len_final","origin","out","sol","str","str2","str3","var.treshold")

### 2) PV Scenarios Generation
    A) Initialize the scenario matrices
    B) Separate the cloudy and clearsky
    C) Generate 24h ahead scenarios for each EMS period
    D) Save the obtained data

In [24]:
### A) Initialize the scenario matrices:

scen.gen <- matrix(0,nrow = my.2880, ncol = n_scen)
scen.gen.all <- array(rep(1, my.2880*n_scen*my.288), dim=c(my.2880, n_scen, my.288))

In [25]:
### B) Separate the cloudy and clearsky:

# Calculate the perfect 24h forecast:
for.gen.long <- spline(1:length(gen_seg_for), gen_seg_for, n = my.2880)$y

# Create a weather vector ()
h.weather <- matrix(FALSE,nrow=1,ncol=my.41)
h.weather[mesos.nuvols] = TRUE
h.weather.large <- enlarge(h.weather,t.for/t.int) # stretch the vector to 30s intervals

d.t = t.ems/t.int

In [26]:
### C) Create 24h ahead scenarios for each EMS period:

for(p in 1:my.288)
{    
    for(i in 1:n_scen)
    {
        ## long term (35min) randomness:

        # Generation (based on perfect 35min forecasts, splined to 2880 points)
        scen.gen[,i] <- for.gen.long

        ### Short term (30s) randomness:
        # Cloudy Generation - Beta
        scen.gen[as.logical(h.weather.large),i] <- (scen.gen[as.logical(h.weather.large),i] + (min.gen.err+(max.gen.err-min.gen.err)*rbeta(length(scen.gen[h.weather.large,i]),shape1 = gen.shape1 , shape2 = gen.shape2)))
        # Clear-sky Generation - Normal
        scen.gen[!h.weather.large,i] <-  scen.gen[!h.weather.large,i] + rnorm(length( scen.gen[!h.weather.large,i]),mean = 0, sd = sd_err_gen_perf)

        # Limit to 0 (not going below 0)
        scen.gen[,i] = pmax(scen.gen[,i],0)
        # Remove the night hours:
        scen.gen[for.gen.long<100,i] = 0
        
        # Shift to match current T_EMS:
        n = (p-1)*d.t
        if(n!=0)
        {
            scen.gen[,i] <- c(tail(scen.gen[,i], -n), head(scen.gen[,i], n))
        }
    }
    scen.gen.all[,,p] <- scen.gen
}


In [27]:
### D) Save the obtained data:

# writeMat(gen_seg = gen_seg, con="../../Data/Generated Data/1 - Secondly/gen_seg.mat")
writeMat(gen_seg_org = gen_seg, con="../../Data/Generated Data/1 - Secondly/gen_seg_org.mat")
writeMat(gen_scen = scen.gen.all, con="../../Data/Generated Data/2 - Scenarios/gen_scen.mat")

In [28]:
gen.all = gen_seg
save("gen.all",file="../../Data/Generated Data/1 - Secondly/gen_seg_org.Rdata")