# RIDGE

In [1]:
using CSV, Tables, LinearAlgebra, Random, Gurobi, JuMP, Statistics, DataFrames, Metrics
#read train and validation data and split it into X and y
X_train_solar=Matrix(DataFrame(CSV.File("X_train_solar.csv")))
y_train_solar=Matrix(DataFrame(CSV.File("y_train_solar.csv")))
X_train_wind=Matrix(DataFrame(CSV.File("X_train_wind.csv")))
y_train_wind=Matrix(DataFrame(CSV.File("y_train_wind.csv")))

X_valid_solar=Matrix(DataFrame(CSV.File("X_valid_solar.csv")))
y_valid_solar=Matrix(DataFrame(CSV.File("y_valid_solar.csv")))
X_valid_wind=Matrix(DataFrame(CSV.File("X_valid_wind.csv")))
y_valid_wind=Matrix(DataFrame(CSV.File("y_valid_wind.csv")));

### NORMALIZE DATA
We normalize the data with min max scaling. We apply normalization for the numerical features only and then we use alpha to balance between numerical and binary variables.

In [2]:
function min_max_scaling(X, num_feature_indices)
    for i in num_feature_indices
        X[:, i] = (X[:, i] .- minimum(X[:, i])) ./ (maximum(X[:, i]) .- minimum(X[:, i]))
    end
    return X
end

min_max_scaling (generic function with 1 method)

#### SOLAR

In [3]:
#scale SOLAR TRAIN only numerical
X_num_solar=min_max_scaling(Matrix(X_train_solar), 1:15)

# Binary features as they are
X_bin_solar = X_train_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_train_solar_norm = [X_num_solar X_bin_solar];

In [4]:
#scale SOLAR VALID only numerical
X_num_solar=min_max_scaling(Matrix(X_valid_solar), 1:15)

# Binary features as they are
X_bin_solar = X_valid_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_valid_solar_norm = [X_num_solar X_bin_solar];

In [5]:
#scale Y 
y_train_solar_norm = min_max_scaling(Matrix(y_train_solar), 1:1)
y_valid_solar_norm = min_max_scaling(Matrix(y_valid_solar), 1:1);

#### WIND

In [6]:
X_num_wind=min_max_scaling(Matrix(X_train_wind), 1:15)

# Binary features as they are
X_bin_wind = X_train_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_train_wind_norm = [X_num_wind X_bin_wind];

In [7]:
X_num_wind=min_max_scaling(Matrix(X_valid_wind), 1:15)

# Binary features as they are
X_bin_wind = X_valid_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_valid_wind_norm = [X_num_wind X_bin_wind];

In [8]:
#scale Y 
y_train_wind_norm=min_max_scaling(Matrix(y_train_wind), 1:1)
y_valid_wind_norm=min_max_scaling(Matrix(y_valid_wind), 1:1);

## Ridge

In [9]:
function ridge(X,y,lambda)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])    
    #Insert constraints
    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs).^2)
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

ridge (generic function with 1 method)

In [10]:
#CROSS VALIDATION: find best lambda 
function find_best_lambda(X,y,X_valid,y_valid)
    lowest_mse=Inf
    best_lambda=0
    for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
        beta_ridge=ridge(X, y, lambda)
        mse_temp= mse(y_valid, X_valid*beta_ridge)
        if mse_temp<lowest_mse
            lowest_mse=mse_temp
            best_lambda=lambda
        end
    end
    return best_lambda
end

find_best_lambda (generic function with 1 method)

### SOLAR

#### CV

In [11]:
#Not normalized
find_best_lambda(X_train_solar,y_train_solar, X_valid_solar, y_valid_solar)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.9

In [12]:
#X normalized
find_best_lambda(X_train_solar_norm,y_train_solar, X_valid_solar_norm, y_valid_solar)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.2

In [13]:
#Not normalized
find_best_lambda(X_train_solar_norm,y_train_solar_norm, X_valid_solar_norm, y_valid_solar_norm)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.1

#### EVALUATION 

In [23]:
#Not normalized
beta_lasso=ridge(X_train_solar,y_train_solar, 0.9);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [24]:
println("MSE: ", mse(y_valid_solar, X_valid_solar*beta_lasso))
println("MAE: ", mae(y_valid_solar, X_valid_solar*beta_lasso))
println("R2: ", r2_score(y_valid_solar, X_valid_solar*beta_lasso))

MSE: 9463.110065938812
MAE: 78.91274582834613
R2: 0.3456856472945672


In [25]:
#X normalized
beta_lasso=ridge(X_train_solar_norm,y_train_solar, 0.2);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [26]:
println("MSE: ", mse(y_valid_solar, X_valid_solar_norm*beta_lasso))
println("MAE: ", mae(y_valid_solar, X_valid_solar_norm*beta_lasso))
println("R2: ", r2_score(y_valid_solar, X_valid_solar_norm*beta_lasso))

MSE: 9311.563483600079
MAE: 79.17314992497298
R2: 0.3674740660527871


In [27]:
#X,y normalized
beta_lasso=ridge(X_train_solar_norm,y_train_solar_norm, 0.1);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [28]:
println("MSE: ", mse(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))
println("MAE: ", mae(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))
println("R2: ", r2_score(y_valid_solar_norm, X_valid_solar_norm*beta_lasso))

MSE: 0.050235256420229295
MAE: 0.1817041057153202
R2: -43.75806166400589


### WIND

#### CV

In [29]:
#Not normalized
find_best_lambda(X_train_wind,y_train_wind, X_valid_wind, y_valid_wind)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.6

In [30]:
#X normalized
find_best_lambda(X_train_wind_norm,y_train_wind, X_valid_wind_norm, y_valid_wind)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.9

In [31]:
#Not normalized
find_best_lambda(X_train_wind_norm,y_train_wind_norm, X_valid_wind_norm, y_valid_wind_norm)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.9

#### EVALUATION 

In [32]:
#Not normalized
beta_lasso=ridge(X_train_wind,y_train_wind, 0.6);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [33]:
println("MSE: ", mse(y_valid_wind, X_valid_wind*beta_lasso))
println("MAE: ", mae(y_valid_wind, X_valid_wind*beta_lasso))
println("R2: ", r2_score(y_valid_wind, X_valid_wind*beta_lasso))

MSE: 1.2917186508849436e7
MAE: 2605.6402423288923
R2: -0.38130277282271874


In [34]:
#X normalized
beta_lasso=ridge(X_train_wind_norm,y_train_wind, 0.9);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [35]:
println("MSE: ", mse(y_valid_wind, X_valid_wind_norm*beta_lasso))
println("MAE: ", mae(y_valid_wind, X_valid_wind_norm*beta_lasso))
println("R2: ", r2_score(y_valid_wind, X_valid_wind_norm*beta_lasso))

MSE: 1.3318411082901742e8
MAE: 9273.380899916401
R2: -1.568219509204742


In [36]:
#X,y normalized
beta_lasso=ridge(X_train_wind_norm,y_train_wind_norm, 0.9);

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


In [37]:
println("MSE: ", mse(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))
println("MAE: ", mae(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))
println("R2: ", r2_score(y_valid_wind_norm, X_valid_wind_norm*beta_lasso))

MSE: 0.0863312002625231
MAE: 0.2159258274030461
R2: -0.2947390841997417
