#### REGULARIZATION

We want to create two prediction models both for solar energy output and wind energy output. In order to predict these outputs we want to build a model that selects the best possible predictors maximizing the feature significance, avoiding colinearity, ensuring sparisty, considering the best feature transformations... To do so we will apply different models like Lasso, and Ridge and then we can apply Holistic Regression to see how the model is improved by it. We will run all of these models by all predictors of solar and energy output. 

In [1]:
using CSV, Tables, LinearAlgebra, Random, Gurobi, JuMP, Statistics, DataFrames
#read train and validation data and split it into X and y
X_train_solar=Matrix(DataFrame(CSV.File("X_train_solar.csv")))
y_train_solar=Matrix(DataFrame(CSV.File("y_train_solar.csv")))
X_train_wind=Matrix(DataFrame(CSV.File("X_train_wind.csv")))
y_train_wind=Matrix(DataFrame(CSV.File("y_train_wind.csv")))

X_valid_solar=Matrix(DataFrame(CSV.File("X_valid_solar.csv")))
y_valid_solar=Matrix(DataFrame(CSV.File("y_valid_solar.csv")))
X_valid_wind=Matrix(DataFrame(CSV.File("X_valid_wind.csv")))
y_valid_wind=Matrix(DataFrame(CSV.File("y_valid_wind.csv")));

### Functions

In [2]:
function lasso(X,y,lambda=0.25)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])
    
    
    #Insert constraints

    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    
    #sum((y-X*beta).^2)) = sum((y[i] - sum(X[i,j]*beta[j]) for j=1:p)^2 for i=1:n)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs))
    
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

lasso (generic function with 2 methods)

In [66]:
function ridge(X,y,lambda=0.25)
    
    #Build model
    model = Model(Gurobi.Optimizer)#we have defined the model, pass Gurobi optimizer into the model
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    n, p = size(X)
    
    @variable(model, beta[j=1:p])
    @variable(model, beta_abs[j=1:p])
    
    
    #Insert constraints

    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
    
    #sum((y-X*beta).^2)) = sum((y[i] - sum(X[i,j]*beta[j]) for j=1:p)^2 for i=1:n)
    
    #Insert objective
    @objective(model, Min, sum((y-X*beta).^2) + lambda*sum(beta_abs.^2))
    
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

ridge (generic function with 2 methods)

In [3]:
function mse(X,y, beta)
    return sum((y-X*beta).^2)/length(y)
end

mse (generic function with 1 method)

In [4]:
function correlation(X,p)
    correlation_matrix = Statistics.cor(X) #compute corr matrix
    n,m = size(correlation_matrix) #set sizes
    correlated_pairs=[] #empty list to store correlated pairs
    for i=1:n #for all features
        for j=i+1:m 
            if abs(correlation_matrix[i,j])>p #if the abs value of the corr is higher than p
                push!(correlated_pairs, (i,j)) #append pair to list of correlated pairs
            end
        end
    end
    return correlated_pairs
end

correlation (generic function with 1 method)

In [5]:
#Build transformation function
function transformation(X)
    X_old=DataFrame(X, :auto) #define X as a df
    X_new=DataFrame() #new empty df 
    n,p=size(X_old)
    e=1
    for i=1:p #for each feature in X add 4 transformations
        X_new[!, "X$i"]=X_old[:,i] #transformation 1: no transformation
        X_new[!, "Sqrt$i"]=X_old[:,i].^2 #transformation 2: square root
        #X_new[!, "Log$i"]=log.(abs.(X_old[:,i])) #transformation 3: log
        X_new[!, "Abs_Sqrt$i"]=sqrt.(abs.(X_old[:,i])) #transformation 3: absolute squared root
        #square root of absolute value
        
    end
    return(X_new) #we return a new df with all transformations, it will have size nxp*4
end

transformation (generic function with 1 method)

In [6]:
function holistic_regression(X,y,lambda=0.25, per=0.6, M=50, k=10) #add parameter per
    
    #Call functions
    X_new=Matrix(transformation(X)) #call function with all transformations of X
    HC = correlation(X_new, per) #call correlation function to compute hc_pairs
    
    #Set sizes
    n,p_new=size(X_new)
    
    #Build model
    model = Model(Gurobi.Optimizer)
    #model = Model(Gurobi.Optimizer, NonConvex = 2)#we have defined the model, pass Gurobi optimizer into the model
    #model = Model(with_optimizer(Gurobi.Optimizer, NonConvex = 2))
    set_optimizer_attribute(model,"OutputFlag",0)
    
    #Insert variables
    @variable(model, beta[1:p_new])
    @variable(model, beta_abs[1:p_new])
    @variable(model, z[1:p_new], Bin) #we add a binary variable
    

    #Insert constraints
    @constraint(model, beta_abs .>= beta) #put the dot is like doing the loop over all j
    @constraint(model, beta_abs .>= -beta)
    
        
    #sparsity constraint: over all 60 features (including transformations) 
    @constraint(model, -M*z .<= beta)
    @constraint(model, beta .<= M*z)
    @constraint(model, sum(z) <= k) 
    
    #constraint on Transformation: from the 4 transformations per each feature we only select one
    for i=1:3:p_new
        #for j=i:i+3 #for every 4 transformations
        @constraint(model, sum(z[i:i+2])<=1)
        #end
    end #we get a vector with 15 features
    
    #constraint on HC pairs once we have selected 15 features 
    for (i,j) in HC
        @constraint(model, z[i] + z[j] <= 1)
    end #we can only take one of the pairs of correlated pairs
    #@constraint(model, sum(z[i])<=k) #ensure that the model has at most 8 features
    
        
    #Insert objective
    @objective(model, Min, sum((y-X_new*beta).^2) + lambda*sum(beta_abs))
    
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

holistic_regression (generic function with 5 methods)

### NORMALIZE DATA
We normalize the data with min max scaling. We apply normalization for the numerical features only and then we use alpha to balance between numerical and binary variables.

In [7]:
function min_max_scaling(X, num_feature_indices)
    for i in num_feature_indices
        X[:, i] = (X[:, i] .- minimum(X[:, i])) ./ (maximum(X[:, i]) .- minimum(X[:, i]))
    end
    return X
end

min_max_scaling (generic function with 1 method)

#### SOLAR

In [8]:
#scale SOLAR TRAIN only numerical
X_train_solar=min_max_scaling(Matrix(X_train_solar), 1:15)

#numerical features
X_num_solar = X_train_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_train_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_train_solar_norm = [X_num_solar X_bin_solar];

In [9]:
#scale SOLAR TRAIN only numerical
X_valid_solar=min_max_scaling(Matrix(X_valid_solar), 1:15)

#numerical features
X_num_solar = X_valid_solar[:, 1:15]

# Binary features as they are
X_bin_solar = X_valid_solar[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_solar = alpha*X_num_solar
X_bin_solar = (1-alpha)*X_bin_solar;

# Append data
X_valid_solar_norm = [X_num_solar X_bin_solar];

In [10]:
#scale Y 
y_train_solar_norm = min_max_scaling(Matrix(y_train_solar), 1:1)
y_valid_solar_norm = min_max_scaling(Matrix(y_valid_solar), 1:1);

#### WIND

In [11]:
X_train_wind=min_max_scaling(Matrix(X_train_wind), 1:15)

#numerical features
X_num_wind = X_train_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_train_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_train_wind_norm = [X_num_wind X_bin_wind];

In [12]:
X_valid_wind=min_max_scaling(Matrix(X_valid_wind), 1:15)

#numerical features
X_num_wind = X_valid_wind[:, 1:15]

# Binary features as they are
X_bin_wind = X_valid_wind[:,16:end]

# Numerical vs Categorical features scaling
alpha = 0.75
X_num_wind = alpha*X_num_wind
X_bin_wind = (1-alpha)*X_bin_wind;

# Append data
X_valid_wind_norm = [X_num_wind X_bin_wind];

In [13]:
#scale Y 
y_train_wind_norm=min_max_scaling(Matrix(y_train_wind), 1:1)
y_valid_wind_norm=min_max_scaling(Matrix(y_valid_wind), 1:1);

## SOLAR ENERGY OUTPUT

* Lasso: Most values tending to 0, MSE of 0.00673 
* Holistic Regression: do CV to get optimal value of k, rho, and lambda. (Best lambda: 0.9 , Best rho: 0.6, Best k: 7)
    - Selected features (7): Square root of FEELSLIKE, Square root of HUMIDITY, Squared of SNOWDEPTH, Square root of WINDSPEED, Squared of WINDDIR, Square root of SEALEVELPRESSURE, and Square root of SOLARIRRADIATION
    - MSE: 0.0065948

### 1. LASSO
- CV to find best lambda for 3 cases: not normalized (0.9), X normalized (0.9), X+y normalized (0.1)
- Compute lasso for 3 cases: not normalized, X normalized, X+y normalized
- Report MSE validation and seleclted features

#### CV lasso

In [69]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_solar, y_train_solar, i)
    mse_temp= mse(X_valid_solar, y_valid_solar, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for train not normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for train not normalized is: 0.9

In [70]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_solar_norm, y_train_solar, i)
    mse_temp= mse(X_valid_solar_norm, y_valid_solar, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X normalized is: 0.9

In [71]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_solar_norm, y_train_solar_norm, i)
    mse_temp= mse(X_valid_solar_norm, y_valid_solar_norm, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X and y normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X and y normalized is: 0.1

#### MSE lasso

In [81]:
beta_lasso=lasso(X_train_solar,y_train_solar,0.9)
mse(X_valid_solar, y_valid_solar, beta_lasso)
mse(X_train_solar, y_train_solar, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


79746.50162795983

In [76]:
#read dataframe
df = CSV.read("X_train_solar.csv", DataFrame)
names(df)
#merge names with betas and sort by absolute value of beta
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,2454.97
2,precipprob,2419.92
3,temp,722.556
4,humidity,332.013
5,solarradiation,330.532
6,snowdepth,222.47
7,sealevelpressure,176.61
8,windspeed,146.237
9,precip,141.381
10,uvindex,135.429


In [77]:
beta_lasso=lasso(X_train_solar_norm,y_train_solar,0.9)
mse(X_valid_solar_norm, y_valid_solar, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


94996.27515973142

In [78]:
#merge names with betas and sort by absolute value of beta
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,9824.44
2,precipprob,3281.89
3,temp,963.264
4,humidity,442.729
5,solarradiation,440.723
6,snowdepth,296.628
7,sealevelpressure,235.5
8,conditions_Snow_rain_overcast,202.604
9,preciptype_rain_and_snow,202.604
10,windspeed,195.053


In [83]:
beta_lasso=lasso(X_train_solar_norm,y_train_solar_norm,0.1)
mse(X_valid_solar_norm, y_valid_solar_norm, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.006718893421893595

In [80]:
#merge names with betas and sort by absolute value of beta
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,0.379091
2,temp,0.158862
3,precipprob,0.131119
4,solarradiation,0.125726
5,snowdepth,0.0814218
6,humidity,0.078102
7,sealevelpressure,0.0665399
8,conditions_Snow_rain_overcast,0.053064
9,preciptype_rain_and_snow,0.053064
10,windspeed,0.0493524


### 2. RIDGE

#### CV ridge

In [85]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_solar, y_train_solar, i)
    mse_temp= mse(X_valid_solar, y_valid_solar, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X not normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X not normalized is: 0.9

In [87]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_solar_norm, y_train_solar, i)
    mse_temp= mse(X_valid_solar_norm, y_valid_solar, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X normalized is: 0.1

In [88]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_solar_norm, y_train_solar_norm, i)
    mse_temp= mse(X_valid_solar_norm, y_valid_solar_norm, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X normalized is: 0.1

#### MSE ridge

In [94]:
beta_ridge=ridge(X_train_solar,y_train_solar,0.9)
mse(X_valid_solar, y_valid_solar, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


94685.22264048876

In [95]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,1804.87
2,precipprob,1509.88
3,conditions_Overcast,612.229
4,conditions_Partly_cloudy,610.23
5,preciptype_rain,604.27
6,conditions_Clear,582.413
7,conditions_Snow_rain_overcast,499.274
8,preciptype_rain_and_snow,499.274
9,temp,469.338
10,conditions_Snow_overcast,406.34


In [96]:
beta_ridge=ridge(X_train_solar_norm,y_train_solar,0.1)
mse(X_valid_solar_norm, y_valid_solar, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


96895.42379218529

In [97]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,6735.68
2,precipprob,2820.81
3,conditions_Overcast,2284.54
4,conditions_Partly_cloudy,2282.45
5,conditions_Clear,2168.69
6,temp,1404.72
7,humidity,732.733
8,conditions_Snow_rain_overcast,462.002
9,preciptype_rain_and_snow,462.002
10,solarradiation,433.436


In [98]:
beta_ridge=ridge(X_train_solar_norm,y_train_solar_norm,0.1)
mse(X_valid_solar_norm, y_valid_solar_norm, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.006735605054543574

In [99]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,0.255248
2,temp,0.216115
3,solarradiation,0.118694
4,humidity,0.11058
5,precipprob,0.109319
6,conditions_Overcast,0.0993505
7,conditions_Partly_cloudy,0.0956782
8,snowdepth,0.0826376
9,sealevelpressure,0.0667131
10,conditions_Clear,0.0602194


### 3. HOLISTIC

#### CV Holistic

In [100]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_solar, y_train_solar, lambda, rho, 50, k)
            X_valid_solar_trans=Matrix(transformation(X_valid_solar))
            mse_temp= mse(X_valid_solar_trans, y_valid_solar, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X+y not normalized is: ", best_lambda)
print("best rho for X+y not normalized is: ", best_rho)
print("best k for X+y not normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

In [102]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_solar_norm, y_train_solar, lambda, rho, 50, k)
            X_valid_solar_norm_trans=Matrix(transformation(X_valid_solar_norm))
            mse_temp= mse(X_valid_solar_norm_trans, y_valid_solar, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X normalized is: ", best_lambda)
print("best rho for X normalized is: ", best_rho)
print("best k for X normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

In [19]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_solar_norm, y_train_solar_norm, lambda, rho, 50, k)
            X_valid_solar_norm_trans=Matrix(transformation(X_valid_solar_norm))
            mse_temp= mse(X_valid_solar_norm_trans, y_valid_solar_norm, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X+y normalized is: ", best_lambda)
print("best rho for X+y normalized is: ", best_rho)
print("best k for X+y normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

#### MSE Holistic

In [106]:
beta_opt=holistic_regression(X_train_solar, y_train_solar,0.1 ,0.9 , 50, 13)
X_valid_solar_trans=Matrix(transformation(X_valid_solar))
mse(X_valid_solar_trans, y_valid_solar, beta_opt)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


6.831339460349636e6

In [107]:
selected_features=findall(x->x>0, beta_opt)
names(transformation(X_train_solar))[selected_features]

13-element Vector{String}:
 "Abs_Sqrt2"
 "Sqrt3"
 "Abs_Sqrt4"
 "Abs_Sqrt8"
 "Abs_Sqrt9"
 "Abs_Sqrt10"
 "Abs_Sqrt11"
 "Abs_Sqrt12"
 "Abs_Sqrt13"
 "Abs_Sqrt14"
 "Abs_Sqrt17"
 "Abs_Sqrt18"
 "Abs_Sqrt23"

In [108]:
beta_opt=holistic_regression(X_train_solar_norm, y_train_solar,0.1 ,0.9 , 50, 13)
X_valid_solar_norm_trans=Matrix(transformation(X_valid_solar_norm))
mse(X_valid_solar_norm_trans, y_valid_solar, beta_opt)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


7.288945656702346e6

In [109]:
selected_features=findall(x->x>0, beta_opt)
names(transformation(X_train_solar))[selected_features]

13-element Vector{String}:
 "Abs_Sqrt2"
 "Sqrt3"
 "Abs_Sqrt4"
 "Abs_Sqrt8"
 "Abs_Sqrt9"
 "Abs_Sqrt10"
 "Abs_Sqrt11"
 "Abs_Sqrt12"
 "Abs_Sqrt13"
 "Abs_Sqrt14"
 "Abs_Sqrt17"
 "Abs_Sqrt18"
 "Abs_Sqrt23"

In [110]:
beta_opt=holistic_regression(X_train_solar_norm, y_train_solar_norm, 0.9, 0.6, 50, 7)
X_valid_solar_norm_trans=Matrix(transformation(X_valid_solar_norm))
mse(X_valid_solar_norm_trans, y_valid_solar_norm, beta_opt)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.006594836542325529

In [111]:
selected_features=findall(x->x>0, beta_opt)
names(transformation(X_train_solar))[selected_features]

7-element Vector{String}:
 "Abs_Sqrt2"
 "Abs_Sqrt4"
 "Sqrt8"
 "Abs_Sqrt9"
 "Sqrt10"
 "Abs_Sqrt11"
 "Abs_Sqrt14"

* Square root of FEELSLIKE
* Square root of HUMIDITY
* Squared of SNOWDEPTH
* Square root of WINDSPEED
* Squared of WINDDIR
* Square root of SEALEVELPRESSURE
* Square root of SOLARIRRADIATION

## WIND ENERGY OUTPUT
* Lasso: Most values tending to 0. MSE of 0.04095
* Holistic Regression: do CV to get optimal value of k, rho, and lambda. (Best lambda: 0.9, Best rho: 0.9, Best k: 10)
    - Selected features (5): Square root of DEW, Square root of PRECIPITATION PROBABILITY, WINDSPEED, Square root of WINDDIRECTION, and Squared of VISIBILITY
    - MSE of 0.038496

### 1. LASSO

#### CV lasso

In [112]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_wind, y_train_wind, i)
    mse_temp= mse(X_valid_wind, y_valid_wind, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X not normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X not normalized is: 0.1

In [113]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_wind_norm, y_train_wind, i)
    mse_temp= mse(X_valid_wind_norm, y_valid_wind, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X normalized is: 0.1

In [114]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_lasso=lasso(X_train_wind_norm, y_train_wind_norm, i)
    mse_temp= mse(X_valid_wind_norm, y_valid_wind_norm, beta_lasso)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X+y normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X+y normalized is: 0.1

#### MSE lasso

In [115]:
beta_lasso=lasso(X_train_wind,y_train_wind, 0.1)
mse(X_valid_wind, y_valid_wind, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


3.4277155620220914e7

In [116]:
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,windspeed,42684.6
2,dew,24900.9
3,precipprob,15561.9
4,preciptype_no_precipitation,14985.7
5,precip,2555.38
6,winddir,2104.13
7,visibility,1292.1
8,uvindex,930.612
9,conditions_Rain_overcast,463.243
10,conditions_Clear,322.546


In [117]:
beta_lasso=lasso(X_train_wind_norm,y_train_wind, 0.1)
mse(X_valid_wind_norm, y_valid_wind, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


3.4277160175948605e7

In [118]:
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,preciptype_no_precipitation,59945.7
2,windspeed,56912.8
3,dew,33201.1
4,precipprob,21042.2
5,precip,3407.17
6,winddir,2805.5
7,conditions_Rain_overcast,1852.97
8,visibility,1722.8
9,conditions_Clear,1287.21
10,uvindex,1240.8


In [119]:
beta_lasso=lasso(X_train_wind_norm,y_train_wind_norm, 0.1)
mse(X_valid_wind_norm, y_valid_wind_norm, beta_lasso)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.04026353151171594

In [120]:
sort!(DataFrame(name = names(df), beta = beta_lasso), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,windspeed,2.05291
2,preciptype_no_precipitation,1.91018
3,dew,0.998642
4,precipprob,0.680195
5,precip,0.101719
6,winddir,0.101584
7,visibility,0.0700878
8,conditions_Clear,0.0604674
9,conditions_Rain_overcast,0.0604431
10,conditions_Partly_cloudy,0.051728


### 2. RIDGE

#### CV ridge

In [121]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_wind, y_train_wind, i)
    mse_temp= mse(X_valid_wind, y_valid_wind, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X not normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X not normalized is: 0.9

In [122]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_wind_norm, y_train_wind, i)
    mse_temp= mse(X_valid_wind_norm, y_valid_wind, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for X normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for X normalized is: 0.9

In [124]:
#find best lambda
lowest_mse=Inf
best_lambda=0
for i in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    beta_ridge=ridge(X_train_wind_norm, y_train_wind_norm, i)
    mse_temp= mse(X_valid_wind_norm, y_valid_wind_norm, beta_ridge)
    if mse_temp<lowest_mse
        lowest_mse=mse_temp
        best_lambda=i
    end
end
print("best lambda for x+y normalized is: ", best_lambda)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
best lambda for x+y normalized is: 0.9

#### MSE ridge

In [126]:
beta_ridge=ridge(X_train_wind,y_train_wind,0.9)
mse(X_valid_wind, y_valid_wind, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


3.3267507095802195e7

In [127]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,windspeed,40875.6
2,dew,10452.8
3,preciptype_no_precipitation,7923.88
4,precipprob,6940.48
5,preciptype_rain,2966.36
6,conditions_Clear,2852.63
7,conditions_Partly_cloudy,2772.92
8,conditions_Overcast,2298.33
9,precip,2278.35
10,winddir,2203.37


In [128]:
beta_ridge=ridge(X_train_wind_norm,y_train_wind,0.9)
mse(X_valid_wind_norm, y_valid_wind, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


3.2475359344228506e7

In [129]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,windspeed,54865.0
2,preciptype_no_precipitation,19715.0
3,precipprob,9039.42
4,conditions_Clear,7567.37
5,conditions_Partly_cloudy,7092.63
6,conditions_Overcast,5054.96
7,dew,4299.4
8,visibility,3483.1
9,winddir,3165.4
10,precip,3120.97


In [130]:
beta_ridge=ridge(X_train_wind_norm,y_train_wind_norm,0.9)
mse(X_valid_wind_norm, y_valid_wind_norm, beta_ridge)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


0.037950979697446215

In [131]:
sort!(DataFrame(name = names(df), beta = beta_ridge), :beta, rev=true)

Unnamed: 0_level_0,name,beta
Unnamed: 0_level_1,String,Float64
1,windspeed,1.96733
2,preciptype_no_precipitation,0.706934
3,precipprob,0.324133
4,conditions_Clear,0.271349
5,conditions_Partly_cloudy,0.254326
6,conditions_Overcast,0.181259
7,dew,0.154166
8,visibility,0.124896
9,winddir,0.113504
10,precip,0.111911


### 3. HOLISTIC

#### CV holistic

In [132]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_wind, y_train_wind, lambda, rho, 50, k)
            X_valid_wind_trans=Matrix(transformation(X_valid_wind))
            mse_temp= mse(X_valid_wind_trans, y_valid_wind, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X+y not normalized is: ", best_lambda)
print("best rho for X+y not normalized is: ", best_rho)
print("best k for X+y not normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

In [133]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_wind_norm, y_train_wind, lambda, rho, 50, k)
            X_valid_wind_trans_norm=Matrix(transformation(X_valid_wind_norm))
            mse_temp= mse(X_valid_wind_trans_norm, y_valid_wind, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X normalized is: ", best_lambda)
print("best rho for X normalized is: ", best_rho)
print("best k for X normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

In [29]:
#find best lambda, rho, and k
lowest_mse=Inf
best_lambda=0
best_rho=0
best_k=0
for lambda in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    for rho in [0.5, 0.6, 0.7, 0.8, 0.9]
        for k in [6,7,8,9,10,11,12,13]
            beta_opt=holistic_regression(X_train_wind_norm, y_train_wind_norm, lambda, rho, 50, k)
            X_valid_wind_norm_trans=Matrix(transformation(X_valid_wind_norm))
            mse_temp= mse(X_valid_wind_norm_trans, y_valid_wind_norm, beta_opt)
            #for each lambda, find the best rho
            if mse_temp<lowest_mse
                lowest_mse=mse_temp
                best_lambda=lambda
                best_rho=rho
                best_k=k
            end
        end
    end
end
print("best lambda for X+y normalized is: ", best_lambda)
print("best rho for X+y normalized is: ", best_rho)
print("best k for X+y normalized is: ", best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19
Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19

#### MSE holistic

In [34]:
#mse
X_valid_wind_norm_trans=Matrix(transformation(X_valid_wind_norm))
mse(X_valid_wind_norm_trans, y_valid_wind_norm, beta_opt)

0.043026241287980466

In [35]:
#holistic regression with optimal hyperparameters
beta_opt=holistic_regression(X_train_wind_norm, y_train_wind_norm, best_lambda, best_rho, 50, best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


78-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
 -0.4525511538192362
  0.0
  0.0
  0.0
  0.30649204226385673
  0.0
 -0.3187568928597829
  0.0
  0.0
  ⋮
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0

In [61]:
beta_opt=holistic_regression(X_train_wind, y_train_wind, best_lambda, best_rho, 50, best_k)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-08-19


78-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
  0.0
 50.0
  0.0
 50.0
  0.0
  0.0
  0.0
 50.0
  0.0
  ⋮
  0.0
  0.0
 50.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0

In [65]:
#mse
X_valid_wind_norm_trans=Matrix(transformation(X_valid_wind_norm))
mse(X_valid_wind_norm_trans, y_valid_wind, beta_opt) #improve mse

#NOT NORMALIZE Y WE GET SAME MSE IF WE NORM OR NOT

6.2707208478434905e7

In [36]:
#mse
X_valid_wind_norm_trans=Matrix(transformation(X_valid_wind_norm))
mse(X_valid_wind_norm_trans, y_valid_wind_norm, beta_opt) #improve mse

0.03849602906347231

In [37]:
#return indices of selected features
selected_features=findall(x->x>0, beta_opt)

5-element Vector{Int64}:
  9
 18
 25
 30
 38

In [38]:
#what is the name of the features?
names(transformation(X_train_wind))[selected_features]

5-element Vector{String}:
 "Abs_Sqrt3"
 "Abs_Sqrt6"
 "X9"
 "Abs_Sqrt10"
 "Sqrt13"

* Square root of DEW
* Square root of PRECIPITATION PROBABILITY
* WINDSPEED
* Square root of WINDDIRECTION
* Squared of VISIBILITY

In [46]:
import Pkg; Pkg.add("Metrics")

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m Metrics ─ v0.1.2
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.8/Project.toml`
 [90m [cb9f3049] [39m[92m+ Metrics v0.1.2[39m
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.8/Manifest.toml`
 [90m [cb9f3049] [39m[92m+ Metrics v0.1.2[39m
[32m[1mPrecompiling[22m[39m project...
[32m  ✓ [39mMetrics
  1 dependency successfully precompiled in 4 seconds. 155 already precompiled.


In [54]:
using Metrics
X_train_wind_norm_trans=Matrix(transformation(X_train_wind_norm))
r2_score(X_train_wind_norm_trans*beta_opt, y_train_wind_norm)

0.5379542459979234

In [None]:
using Statistics
SS_t = sum((y_valid_wind_norm .- mean(y_valid_wind_norm)).^2)

# Regression sum of squares
SS_r = sum(((X_valid_wind_norm*beta_opt) .- mean(y_valid_wind_norm)).^2)

R2_wind = SS_r/ SS_t

In [None]:
#RETURN HOLISITIC REGRESSION FOR BOTH TRAIN+validation
#CREATE TEST TO VALIDATE IT 
#PREDICTION: TREES, NEURAL NETWORKS