#### REGULARIZATION

We want to create two prediction models both for solar energy output and wind energy output. In order to predict these outputs we want to build a model that selects the best possible predictors maximizing the feature significance, avoiding colinearity, ensuring sparisty, considering the best feature transformations... To do so we will apply different models like Lasso, and Ridge and then we can apply Holistic Regression to see how the model is improved by it. We will run all of these models by all predictors of solar and energy output. 

1. SOLAR ENERGY OUTPUT

In [1]:
using CSV, Tables, LinearAlgebra, Random, Gurobi, JuMP, Statistics, DataFrames
#read train data and split it into X and y
X_train_solar=
y_train_solar=
X_train_energy=
y_train_energy=

X_valid_solar=
y_valid_solar=
X_valid_energy=
y_valid_energy=

In [None]:
function lasso(X,y,lambda=0.25)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])
    
    
    #Insert constraints

    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    
    #sum((y-X*beta).^2)) = sum((y[i] - sum(X[i,j]*beta[j]) for j=1:p)^2 for i=1:n)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs))
    
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

In [None]:
function mse(X,y, beta)
    return sum((y-X*beta).^2)/length(y)
end

In [None]:
function correlation(X,p)
    correlation_matrix = Statistics.cor(X) #compute corr matrix
    n,m = size(correlation_matrix) #set sizes
    correlated_pairs=[] #empty list to store correlated pairs
    for i=1:n #for all features
        for j=i+1:m 
            if abs(correlation_matrix[i,j])>p #if the abs value of the corr is higher than p
                push!(correlated_pairs, (i,j)) #append pair to list of correlated pairs
            end
        end
    end
    return correlated_pairs
end

In [None]:
#Build transformation function
function transformation(X)
    X_old=DataFrame(X, :auto) #define X as a df
    X_new=DataFrame() #new empty df 
    n,p=size(X_old)
    e=1
    for i=1:p #for each feature in X add 4 transformations
        X_new[!, "X$i"]=X_old[:,i] #transformation 1
        X_new[!, "Sqrt$i"]=X_old[:,i].^2 #transformation 2
        X_new[!, "Abs$i"]=sqrt.(abs.(X_old[:,i])) #transformation 3
        X_new[!, "Log$i"]=log.(abs.(X_old[:,i]).+e) #transformation 4
    end
    return(X_new) #we return a new df with all transformations, it will have size nxp*4
end

In [None]:
function holistic_regression(X,y,lambda=0.25, per=0.5, M=50, k=8) #add parameter per
    
    #Call functions
    X_new=Matrix(transformation(X)) #call function with all transformations of X
    HC = correlation(X_new, per) #call correlation function to compute hc_pairs
    
    #Set sizes
    n,p_new=size(X_new)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    @variable(model, beta[1:p_new])
    @variable(model, beta_abs[1:p_new])
    @variable(model, z[1:p_new], Bin) #we add a binary variable
    

    #Insert constraints
    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
        
    #sparsity constraint: over all 60 features (including transformations) 
    @constraint(model, -M*z .<= beta)
    @constraint(model, beta .<= M*z)
    @constraint(model, sum(z) <= k) 
    
    #constraint on Transformation: from the 4 transformations per each feature we only select one
    for i=1:4:p_new
        #for j=i:i+3 #for every 4 transformations
        @constraint(model, sum(z[i:i+3])<=1)
        #end
    end #we get a vector with 15 features
    
    #constraint on HC pairs once we have selected 15 features 
    for (i,j) in HC
        @constraint(model, z[i] + z[j] <= 1)
    end #we can only take one of the pairs of correlated pairs
    #@constraint(model, sum(z[i])<=k) #ensure that the model has at most 8 features
    
        
    #Insert objective
    @objective(model, Min, sum((y-X_new*beta).^2) + lambda*sum(beta_abs))
    
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end