# Regresja Elastic Net - Elastic Net Regression

### Biblioteki

In [None]:
import os

from collections import defaultdict
from itertools import combinations

import numpy as np
import pandas as pd

from sklearn.preprocessing import scale 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn import metrics

### Stałe

In [None]:
MIN_SIZE_PREDICTORS = 2
MAX_SIZE_PREDICTORS = 15

### Odczytanie danych

In [None]:
# Adres folderu, gdzie zapisano dane.
folder_data = "data"

X = pd.read_hdf(os.path.join(folder_data, "X.h5"), "data")
Y = pd.read_hdf(os.path.join(folder_data, "Y.h5"), "data")

### Loss function - R^2

In [None]:
def metric(Y, Y_pred):
    return metrics.r2_score(Y, Y_pred)

### Lista wyselekcjonowanych predykatorów

In [None]:
selected_predictors = [
    'f1', 'f15', 'f30', 'f45', 'f66', 'f134', 'f198', 
    'f202', 'f207', 'f208', 'f209', 'f211', 'f212', 'f213', 
    'f221', 'f259', 'f260', 'f267', 'f268', 'f275', 'f276', 
    'f280', 'f284', 'f288', 'f289', 'f290', 'f291', 'f292'
]
SELECTED_PREDICTORS = frozenset(selected_predictors)

### Dane predykatorów

In [None]:
SELECTED_X_DATA = X[SELECTED_PREDICTORS]

X_train, X_test , y_train, y_test = train_test_split(SELECTED_X_DATA, Y, test_size=0.3, random_state=1)

### Model

In [None]:
bestModels = {}
vector_combinations = {}
for i in range(MIN_SIZE_PREDICTORS, MAX_SIZE_PREDICTORS + 1):
    vector_combinations[i] = combinations(list(SELECTED_PREDICTORS), i)
    
    bestModels[i] = {}
    for combo in vector_combinations[i]:
        predictors = list(combo)

        # Definiujemy model.
        model = ElasticNet(normalize = True)

        # Dopasowujemy model do danych treningowych.
        model.fit(X_train[predictors], y_train)

        # Obliczanie wartości R2.
        score = np.round(model.score(X_test[predictors], y_test), 2)
        
        # Predykcja.
        y_pred = model.predict(X_test[predictors])

        # Obliczanie błędu
        loss_value = metric(y_test, y_pred)

        entry = {
          'predictors': predictors,
          'model': model, 
          'score': score, 
          'loss_value': loss_value
          }

        if not bestModels[i]:
            bestModels[i] = entry
        elif bestModels[i]['loss_value'] < loss_value:
            bestModels[i] = entry

In [None]:
bestModels