### LASSO

In [3]:
using CSV, Tables, LinearAlgebra, Random, Gurobi, JuMP, Statistics, DataFrames, Metrics
#read train and validation data and split it into X and y
X_train_solar=Matrix(DataFrame(CSV.File("X_train_solar.csv")))
y_train_solar=Matrix(DataFrame(CSV.File("y_train_solar.csv")))
X_train_wind=Matrix(DataFrame(CSV.File("X_train_wind.csv")))
y_train_wind=Matrix(DataFrame(CSV.File("y_train_wind.csv")))

X_valid_solar=Matrix(DataFrame(CSV.File("X_valid_solar.csv")))
y_valid_solar=Matrix(DataFrame(CSV.File("y_valid_solar.csv")))
X_valid_wind=Matrix(DataFrame(CSV.File("X_valid_wind.csv")))
y_valid_wind=Matrix(DataFrame(CSV.File("y_valid_wind.csv")));

### NORMALIZE DATA
We normalize the data with min max scaling. We apply normalization for the numerical features only and then we use alpha to balance between numerical and binary variables.

In [None]:
function min_max_scaling(X, num_feature_indices)
    for i in num_feature_indices
        X[:, i] = (X[:, i] .- minimum(X[:, i])) ./ (maximum(X[:, i]) .- minimum(X[:, i]))
    end
    return X
end

min_max_scaling (generic function with 1 method)

#### SOLAR

In [None]:
#scale SOLAR TRAIN only numerical
X_train_solar=min_max_scaling(Matrix(X_train_solar), 1:15)

#numerical features
X_num_solar = X_train_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_train_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_train_solar_norm = [X_num_solar X_bin_solar];

In [None]:
#scale SOLAR TRAIN only numerical
X_valid_solar=min_max_scaling(Matrix(X_valid_solar), 1:15)

#numerical features
X_num_solar = X_valid_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_valid_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_valid_solar_norm = [X_num_solar X_bin_solar];

In [None]:
#scale Y 
y_train_solar_norm = min_max_scaling(Matrix(y_train_solar), 1:1)
y_valid_solar_norm = min_max_scaling(Matrix(y_valid_solar), 1:1);

#### WIND

In [None]:
X_train_wind=min_max_scaling(Matrix(X_train_wind), 1:15)

#numerical features
X_num_wind = X_train_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_train_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_train_wind_norm = [X_num_wind X_bin_wind];

In [None]:
X_valid_wind=min_max_scaling(Matrix(X_valid_wind), 1:15)

#numerical features
X_num_wind = X_valid_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_valid_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_valid_wind_norm = [X_num_wind X_bin_wind];

In [None]:
#scale Y 
y_train_wind_norm=min_max_scaling(Matrix(y_train_wind), 1:1)
y_valid_wind_norm=min_max_scaling(Matrix(y_valid_wind), 1:1);

## LASSO

In [2]:
function lasso(X,y,lambda)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])    
    #Insert constraints
    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs))
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

lasso (generic function with 1 method)

In [4]:
#CROSS VALIDATION: find best lambda 
function find_best_lambda(X,y,X_valid,y_valid)
    lowest_mse=Inf
    best_lambda=0
    for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
        beta_lasso=lasso(X, y, lambda)
        mse_temp= mse(X_valid, y_valid*beta_lasso)
        if mse_temp<lowest_mse
            lowest_mse=mse_temp
            best_lambda=lambda
        end
    end
    return best_lambda
end

find_best_lambda (generic function with 1 method)

### SOLAR

#### CV

In [None]:
#Not normalized
lasso_beta = lasso(X_train_solar,y_train_solar, lambda)
mse(y_train_solar, X_train_solar*lasso_beta)

In [6]:
preds = X_valid_solar*lasso_beta;