### LASSO

In [56]:
using CSV, Tables, LinearAlgebra, Random, Gurobi, JuMP, Statistics, DataFrames, Metrics
#read train and validation data and split it into X and y
X_train_solar=Matrix(DataFrame(CSV.File("X_train_solar.csv")))
y_train_solar=Matrix(DataFrame(CSV.File("y_train_solar.csv")))
X_train_wind=Matrix(DataFrame(CSV.File("X_train_wind.csv")))
y_train_wind=Matrix(DataFrame(CSV.File("y_train_wind.csv")))

X_valid_solar=Matrix(DataFrame(CSV.File("X_valid_solar.csv")))
y_valid_solar=Matrix(DataFrame(CSV.File("y_valid_solar.csv")))
X_valid_wind=Matrix(DataFrame(CSV.File("X_valid_wind.csv")))
y_valid_wind=Matrix(DataFrame(CSV.File("y_valid_wind.csv")));

### NORMALIZE DATA
We normalize the data with min max scaling. We apply normalization for the numerical features only and then we use alpha to balance between numerical and binary variables.

In [57]:
function min_max_scaling(X, num_feature_indices)
    for i in num_feature_indices
        X[:, i] = (X[:, i] .- minimum(X[:, i])) ./ (maximum(X[:, i]) .- minimum(X[:, i]))
    end
    return X
end

min_max_scaling (generic function with 1 method)

#### SOLAR

In [58]:
#scale SOLAR TRAIN only numerical
X_train_solar=min_max_scaling(Matrix(X_train_solar), 1:15)

#numerical features
X_num_solar = X_train_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_train_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_train_solar_norm = [X_num_solar X_bin_solar];

In [59]:
#scale SOLAR TRAIN only numerical
X_valid_solar=min_max_scaling(Matrix(X_valid_solar), 1:15)

#numerical features
X_num_solar = X_valid_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_valid_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_valid_solar_norm = [X_num_solar X_bin_solar];

In [60]:
#scale Y 
y_train_solar_norm = min_max_scaling(Matrix(y_train_solar), 1:1)
y_valid_solar_norm = min_max_scaling(Matrix(y_valid_solar), 1:1);

#### WIND

In [61]:
X_train_wind=min_max_scaling(Matrix(X_train_wind), 1:15)

#numerical features
X_num_wind = X_train_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_train_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_train_wind_norm = [X_num_wind X_bin_wind];

In [62]:
X_valid_wind=min_max_scaling(Matrix(X_valid_wind), 1:15)

#numerical features
X_num_wind = X_valid_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_valid_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_valid_wind_norm = [X_num_wind X_bin_wind];

In [63]:
#scale Y 
y_train_wind_norm=min_max_scaling(Matrix(y_train_wind), 1:1)
y_valid_wind_norm=min_max_scaling(Matrix(y_valid_wind), 1:1);

## LASSO

In [64]:
function lasso(X,y,lambda)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])    
    #Insert constraints
    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs))
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

lasso (generic function with 1 method)

In [65]:
#CROSS VALIDATION: find best lambda 
function find_best_lambda(X,y,X_valid,y_valid)
    lowest_mse=Inf
    best_lambda=0
    for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
        beta_lasso=lasso(X, y, lambda)
        mse_temp= mse(y_valid, X_valid*beta_lasso)
        if mse_temp<lowest_mse
            lowest_mse=mse_temp
            best_lambda=lambda
        end
    end
    return best_lambda
end

find_best_lambda (generic function with 1 method)

### SOLAR

#### CV

In [66]:
#Not normalized
find_best_lambda(X_train_solar,y_train_solar, X_valid_solar, y_valid_solar)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

In [67]:
#X normalized
find_best_lambda(X_train_solar_norm,y_train_solar, X_valid_solar_norm, y_valid_solar)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

In [68]:
#Not normalized
find_best_lambda(X_train_solar_norm,y_train_solar_norm, X_valid_solar_norm, y_valid_solar_norm)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.9

#### EVALUATION 

In [69]:
#Not normalized
beta_lasso=lasso(X_train_solar,y_train_solar, 0.9);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [70]:
println("MSE: ", mse(y_valid_solar, X_valid_solar*beta_lasso))
println("MAE: ", mae(y_valid_solar, X_valid_solar*beta_lasso))
println("R2: ", r2_score(y_valid_solar, X_valid_solar*beta_lasso))

MSE: 9666.01234508447
MAE: 80.45320335777758
R2: 0.3756095227452997


In [71]:
#X normalized
beta_lasso=lasso(X_train_solar_norm,y_train_solar, 0.1);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [72]:
println("MSE: ", mse(y_valid_solar, X_valid_solar_norm*beta_lasso))
println("MAE: ", mae(y_valid_solar, X_valid_solar_norm*beta_lasso))
println("R2: ", r2_score(y_valid_solar, X_valid_solar_norm*beta_lasso))

MSE: 9665.723461693353
MAE: 80.45188410471667
R2: 0.3756038303856827


In [73]:
#X,y normalized
beta_lasso=lasso(X_train_solar_norm,y_train_solar_norm, 0.9);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [74]:
println("MSE: ", mse(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))
println("MAE: ", mae(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))
println("R2: ", r2_score(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))

MSE: 0.04890333695519139
MAE: 0.17865608206417155
R2: -40.64612308232979


### WIND

#### CV

In [75]:
#Not normalized
find_best_lambda(X_train_wind,y_train_wind, X_valid_wind, y_valid_wind)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

In [76]:
#X normalized
find_best_lambda(X_train_wind_norm,y_train_wind, X_valid_wind_norm, y_valid_wind)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

In [77]:
#Not normalized
find_best_lambda(X_train_wind_norm,y_train_wind_norm, X_valid_wind_norm, y_valid_wind_norm)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

#### EVALUATION 

In [78]:
#Not normalized
beta_lasso=lasso(X_train_wind,y_train_wind, 0.1);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [79]:
println("MSE: ", mse(y_valid_wind, X_valid_wind*beta_lasso))
println("MAE: ", mae(y_valid_wind, X_valid_wind*beta_lasso))
println("R2: ", r2_score(y_valid_wind, X_valid_wind*beta_lasso))

MSE: 1.3295534863807525e8
MAE: 9143.912869846698
R2: -1.4381433162724808


In [80]:
#X normalized
beta_lasso=lasso(X_train_wind_norm,y_train_wind, 0.1);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [81]:
println("MSE: ", mse(y_valid_wind, X_valid_wind_norm*beta_lasso))
println("MAE: ", mae(y_valid_wind, X_valid_wind_norm*beta_lasso))
println("R2: ", r2_score(y_valid_wind, X_valid_wind_norm*beta_lasso))

MSE: 1.3295538012544891e8
MAE: 9143.91420194243
R2: -1.4381436018722429


In [82]:
#X,y normalized
beta_lasso=lasso(X_train_wind_norm,y_train_wind_norm, 0.1);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [83]:
println("MSE: ", mse(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))
println("MAE: ", mae(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))
println("R2: ", r2_score(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))

MSE: 0.09024387516788934
MAE: 0.22112920964188207
R2: -0.2802860802600007
