Import of libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler # To standardize the data
import cvxpy as cp
from sklearn import linear_model

Import of the HMLasso function

In [2]:
### Adapt the path "C:/Users/Kilian/Desktop/ENSAE/STATAPP" to run the cell

import sys
sys.path.insert(1, 'C:/Users/Kilian/Desktop/ENSAE/STATAPP/Projet_Statapp/pretreatment')

import file_04_HMLasso as hml

## Data downloading and separation of the dataset

Dataset containing the types of each column from data_03.csv

In [3]:
columns_types = pd.read_csv("data_03_columns_types.csv", index_col=0)
columns_types.head(3)

Unnamed: 0,Name,Type
0,HHIDPN,Cont
1,HHID,Char
2,PN,Char


Downloading the data with social and genetic variables.

In [4]:
data = pd.read_csv("data_03.csv")

  data = pd.read_csv("data_03.csv")


The column "genetic_Section_A_or_E" have mixed types, so we change its format.

In [5]:
temporary = np.where(data['genetic_Section_A_or_E'] == 'E', 1, np.where(data['genetic_Section_A_or_E'] == 'A', 0, np.nan))

In [6]:
data["genetic_Section_A_or_E"] = temporary

Now we add the health index created by t-SNE

In [7]:
tSNE_GHI = pd.read_csv("data_tSNE_GHI.csv")

We merge the t-SNE health index to the data

In [8]:
data = data.merge(tSNE_GHI, how ='left', on ='HHIDPN')

The final outcome to predict is tSNE_GHI14, so we only keep individuals who were interviewed during the last wave (14th wave)

In [9]:
data_bis = data[data['tSNE_GHI14'].notna()]

Number of individuals present in every waves.

In [10]:
tSNE_GHI[~tSNE_GHI.isnull().any(axis=1)].shape[0]

3396

We select the outcome tSNE_GHI

In [11]:
Y = data_bis[["HHIDPN"]+["tSNE_GHI" + str(i) for i in range (1,15)]]

We drop the previous health index GHIw from the data, which won't be used as outcome.
(list_columns_GHI contains the names of GHIw columns).

We drop the outcome to create the matrix X.

In [12]:
X = data_bis.drop(["GHI" + str(i) for i in range (1,15)], axis = 1)
X.drop(["tSNE_GHI" + str(i) for i in range (1,15)], axis = 1, inplace =True)

Now we split the dataset into training, validation and test sets.

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=18)
X_test, X_valid, Y_test, Y_valid = train_test_split(X_test, Y_test, test_size=0.5, random_state = 6)

## Machine learning

The objective here is to make a dataset where we observe if each variable exists at each wave

In [49]:
def dataset_temporal_variables (X_train,add_tSNE_GHIw):   
    temporal_variables = {}
    waves_columns = [col for col in X_train.columns if "genetic_" not in col and col[1] in "123456789"]
    for col in waves_columns:
      char = col[0] # R or H
      if col[2] in "01234":
        wave = col[1:3]
        suffix = col[3:]
      else:
        wave = col[1]
        suffix = col[2:]
      variable = char + 'w' + suffix

      if variable not in temporal_variables.keys():
        temporal_variables[variable] = np.zeros((14), dtype=bool)

      temporal_variables[variable][int(wave)-1] = True

    temporal_variables = pd.DataFrame(temporal_variables)

    # We manually add "tSNE_GHIw":
    if add_tSNE_GHIw:
        temporal_variables["tSNE_GHIw"] = np.ones((14), dtype=bool)
        waves_columns += [f"tSNE_GHI{w}" for w in range(1,15)]
        
    return (temporal_variables,waves_columns)

In [50]:
# Timeless data
def timeless_variables(X_train,waves_columns):
    non_waves_columns = [col for col in X_train.columns if col not in waves_columns]
    To_remove = ["HHIDPN","PN","HHID","RAHHIDPN"]+["INW"+str(i+1) for i in range (14)]
    for x in To_remove:
        non_waves_columns.remove(x)
    return non_waves_columns

We put the explaining variables by wave in a list of dataset Intemporal variables are put in each one of them

In [51]:
import random

def list_wave(X_train, reduced):
    (temporal_variables , waves_columns) = dataset_temporal_variables(X_train,True)
    non_waves_columns = timeless_variables(X_train,waves_columns)
    
    #Reduce number of variables to code
    if reduced:
        temporal_variables_2 = temporal_variables.iloc[:,[i for i in range(1,15)]+[-i for i in range(1,5)]]
        non_waves_columns_2 = random.choices(non_waves_columns,k=5)

        liste = [] 
        for i in range(14):
            columns_wave_i = ["HHIDPN"]+[col.replace('w', str(i+1)) for col in temporal_variables_2.T[i].index[temporal_variables_2.T[i]] if col != "tSNE_GHIw"]
            #Add the intemporal variables only to the last wave, to avoid duplicated labels issues
            if i == 13:
                liste.append(X_train.loc[X_train["INW"+str(i+1)] == 1, columns_wave_i + non_waves_columns_2])
            else:
                liste.append(X_train.loc[X_train["INW"+str(i+1)] == 1, columns_wave_i])
                
    #All the variables
    else:
        liste = []    # len = 14 
        for i in range(14):
            columns_wave_i = ["HHIDPN"]+[col.replace('w', str(i+1)) for col in temporal_variables.T[i].index[temporal_variables.T[i]] if col != "tSNE_GHIw"]
            #Intemporal variables only to the last wave, to avoid duplicated labels issues
            if i ==  13:
                liste.append(X_train.loc[X_train["INW"+str(i+1)] == 1, columns_wave_i + non_waves_columns])
            else:
                liste.append(X_train.loc[X_train["INW"+str(i+1)] == 1, columns_wave_i])
                
    return (liste)
    

### Lasso selection

We start to initialize with a first lasso on the first wave.

In [52]:
def initialize_Lasso(liste, Y_train, HMLasso, method, mu, limit,print_wave):
    
    if print_wave:
        print("wave",1)
    
    scaler = StandardScaler()#(with_std=False)
    hml.ERRORS_HANDLING = "ignore"
    
    #Prepare data
    X_train1 = liste[0].drop("HHIDPN",axis=1)
    Y_train1 = Y_train.iloc[:,1]
    Y_train1.dropna(inplace =True)
    Y_train1 = Y_train1.values
    Y_train1 = (Y_train1 - np.mean(Y_train1))/np.std(Y_train1)
  
    #HMLasso
    if HMLasso:
        #Standardize X_train
        X_train1 = scaler.fit_transform(X_train1)
        
        coefficients = apply_HMLasso(X_train1, Y_train1,mu)
        
        #Variables to keep
        var_to_keep = coefficients > 10**(limit)
    
    #Common Lasso
    else:
        X_train1 = Na_imputation(X_train1, method)
        
        #Standardize X_train
        X_train1 = scaler.fit_transform(X_train1)
        
        coefficients = apply_Lasso(X_train1, Y_train1, mu)
        
        #Variables top keep
        var_to_keep = coefficients != 0

    #Selection of variables
    if print_wave:
        print("Variables kept :", list(var_to_keep).count(1))
    var_to_keep = np.insert(var_to_keep,0,True)
    
    entire_data = liste[0]
    selected = entire_data[entire_data.columns[var_to_keep]]
        
    return selected

In [53]:
def apply_HMLasso(X, Y, mu):
    
    lasso = hml.HMLasso(mu)
    lasso.fit(X, Y)
    
    coefficients = np.abs(lasso.beta_opt.copy())
    
    return coefficients

def apply_Lasso(X,Y, mu):
    
    clf = linear_model.Lasso(alpha=mu)
    clf.fit(X, Y)
    
    coefficients = clf.coef_
    
    return coefficients

In [54]:
def Na_imputation(X, method):
    if method == "mean":
        return X.fillna(X.mean())

function to impute missing data created when merging by mean but without touching Na values already there before the merge.

In [55]:
def Na_management(df1, df2, index):
    
    merged = df1.merge(df2, how='outer', on = index)
    
    df1_index = df1.set_index(index)
    df2_index = df2.set_index(index)
    
    merged = merged.fillna(merged.mean())
    merged = merged.set_index(index)
    
    df1_index = df1_index.fillna("NaN")
    merged.update(df1_index)
    
    df2_index = df2_index.fillna("NaN")
    merged.update(df2_index)
    
    merged = merged.replace("NaN",np.nan)
    
    merged = merged.reset_index()
    
    return merged

Function to select variables by HMLasso

In [56]:
def Lasso_selection(X_train, Y_train, HMLasso, method, mu, limit, reduced, print_wave):
    
    #If a columns contains only Nan values, we drop it
    empty_col = [col for col in X_train.columns if X_train[col].isnull().all()]
    if empty_col != []:
        X_train.drop(empty_col, axis=1, inplace=True)
    
    liste = list_wave(X_train, reduced)
    
    if print_wave:
        print("Lasso selection, mu =", mu)
    
    selected = initialize_Lasso(liste, Y_train, HMLasso, method, mu, limit,print_wave)
    
    scaler = StandardScaler()#(with_std=False)
    hml.ERRORS_HANDLING = "ignore"
    
    for i in range (1,14) :
        
        if print_wave:
            print("wave",i+1)

        var_to_select = Na_management(selected, liste[i], "HHIDPN")

        Y_train_i = Y_train.iloc[:,[0,i+1]]
        X_Y_train = var_to_select.merge(Y_train_i, how = 'left', on = "HHIDPN")

        Y_train_i = X_Y_train[f"tSNE_GHI{i+1}"]
        X_train_i = X_Y_train.drop([f"tSNE_GHI{i+1}","HHIDPN"], axis =1)

        Y_train_i = Y_train_i.fillna(Y_train_i.mean())
        Y_train_i = Y_train_i.values
        Y_train_i = (Y_train_i - np.mean(Y_train_i))/np.std(Y_train_i)

        
        #HMLasso
        if HMLasso:
            #Standardize X_train
            X_train_i = scaler.fit_transform(X_train_i)
        
            coefficients = apply_HMLasso(X_train_i, Y_train_i, mu)
        
            #Variables to keep
            var_to_keep = coefficients > 10**(limit)
            
        #Common Lasso
        else:
            X_train_i = Na_imputation(X_train_i, method)
        
            #Standardize X_train
            X_train_i = scaler.fit_transform(X_train_i)

            coefficients = apply_Lasso(X_train_i, Y_train_i, mu)

            #Variables top keep
            var_to_keep = coefficients != 0
            
        #Selection of variables
        if print_wave:
            print("Variables kept :", list(var_to_keep).count(1))
        var_to_keep = np.insert(var_to_keep,0,True)

        entire_data = var_to_select
        selected = entire_data[entire_data.columns[var_to_keep]] 
    
    #Return the data with selected variables, the number of variables selected and the names of columns
    return (selected, selected.columns)

### Within estimator

There two types of missing values, the "one-time" missing values when someone didn't awnser a question during the interview or so and the missing values when someone wasn't interviewed at all during a wave.


For the first type, we impute those missing values with the mean of the column (Nan).
(Possibility to work on another imputation method).

Then for the individuals who weren't interviewed during a wave, we replace the missing value with the temporal mean of the variable over time (NanNan)


Finally, we compute (A faire en latex) X_vague_ti = X_ti - temporal_mean(X_ti)

In [57]:
def creation_data_within(selected):
    
    #For the "one-time" missing values imputation by mean
    X_train_within = selected.fillna(selected.mean())
    
    ###For people who weren't interviewed
    # We start by adding the INWw columns to know if the individual was interviewed during the wave w
    X_train_within = X_train_within.merge(X_train[["HHIDPN"]+["INW"+str(i) for i in range(1,15)]], how ="left", on="HHIDPN")  
    
    #We recover the missing values for people who weren't interviewed during the wave w
    X_train_within = recover_missing(X_train_within)
    
    # Creation of the data set for within regression.
    (X_train_within, temporal_variables_within) = data_set_within(X_train_within)
    
    #Still Nan values in intemporal variables
    X_train_within = X_train_within.fillna(X_train_within.mean())
    
    return (X_train_within, temporal_variables_within)

This function return a dataset containing only variables concerned by the wave.

In [58]:
import re
import pickle

def get_wave(data, wave, non_temporal):
  """
  This function returns a smaller dataset summarizing all data for the given wave.

  Note that it also returns columns that are not relative to any wave (for instance, 'HHIDPN')
  """

  assert wave in range(1, 15)

  regex = re.compile("[0-9]+")
  if non_temporal:
        wave_columns = [col for col in data.columns if (len(regex.findall(col)) == 0 or regex.findall(col)[0] == str(wave))]
  else:
        wave_columns = [col for col in data.columns if (regex.findall(col) != [] and regex.findall(col)[0] == str(wave))]
  wave_data = data[wave_columns]

  return wave_data

Function to recover the missing values for people who weren't interviewed during the wave w

In [59]:
def recover_missing(X_train_within):

    X_train_within_index = X_train_within.set_index("HHIDPN")
    wave_1 = get_wave(X_train_within_index,1, non_temporal =False)
    wave_1.loc[wave_1["INW1"] == 0] = np.nan
    wave_1["INW1"].fillna(0)
    Tempo = wave_1

    for i in range(2,15):
        if i == 14:
            wave_i = get_wave(X_train_within_index, i, non_temporal =True)
        else:
            wave_i = get_wave(X_train_within_index, i, non_temporal =False)
        wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
        wave_i["INWw".replace('w', str(i))].fillna(0)
        Tempo = Tempo.merge(wave_i, how= "left", on = "HHIDPN")

    return Tempo.reset_index()

Get a dataframe to know which variables are in X_train_within

Function to compute (A faire en latex) X_vague_ti = X_ti - temporal_mean(X_ti)

It creates columns containing the temporal mean of a temporal variables and then replaces the Nan values (when people weren't interviewed) by this mean. Finally it creates the dataset  X_vague_ti.

In [60]:
def data_set_within (X_train_within):
    
    temporal_variables_within = dataset_temporal_variables(X_train_within, False)[0]
    
    X_within = X_train_within.copy()
    
    for col in temporal_variables_within.columns:
        index_wave = temporal_variables_within.index[temporal_variables_within[col]==1].tolist()
        names_waves = [col.replace('w', str(i+1)) for i in index_wave]
        # (~X_within[names_waves].isna()).sum(axis=1) = number of non missing values
        X_within[col+"_MEAN"] = X_within[names_waves].sum(axis=1)/(~X_within[names_waves].isna()).sum(axis=1)
        for x in names_waves:
            # Imputing the missing values by the temporal mean
            new_col = X_within[x].fillna(X_within[col+"_MEAN"])
            X_within[x] = new_col            
            #Creating the new data for within regression X_vague
            X_within[x] = X_within[x] - X_within[col+"_MEAN"]
    return (X_within, temporal_variables_within)

Now we do the same thing to Y_train but no need to impute the Nan values since the only outcome is tSNE_GHI14 (no Nan).

In [61]:
def creation_outcome_within(Y_train):

    Y_train_within = Y_train.copy()
    
    tSNE_GHI = [f"tSNE_GHI{w}" for w in range(1,15)]

    Y_train_within["tSNE_GHIw_MEAN"] = Y_train_within[tSNE_GHI].sum(axis=1)/(~Y_train_within[tSNE_GHI].isna()).sum(axis=1)
    Y_train_within["tSNE_GHI14_within"] = Y_train_within["tSNE_GHI14"] - Y_train_within["tSNE_GHIw_MEAN"]
    
    return Y_train_within

We can now proceed to the regression

In [62]:
def get_X_Y_within(X_train_within, Y_train_within, temporal_variables_within):
    
    data_regression = X_train_within.merge(Y_train_within[["HHIDPN","tSNE_GHI14_within"]], on = "HHIDPN")
    Y_regression = data_regression["tSNE_GHI14_within"]
    list_to_drop = ["HHIDPN","tSNE_GHI14_within"]+["INW"+str(i) for i in range(1,15)]+[col+"_MEAN" for col in temporal_variables_within.columns]
    X_regression = data_regression.drop(list_to_drop,axis=1)
    
    return (X_regression, Y_regression)
    

In [63]:
from sklearn.linear_model import LinearRegression

def regression(X_regression,Y_regression):
    
    modeleReg=LinearRegression()

    modeleReg.fit(X_regression,Y_regression) 
    
    return modeleReg

In [64]:
def Within_estimates(X, Y, HMLasso, method, mu, limit, reduced, print_wave):
    
    (selected, names_var) = Lasso_selection(X, Y, HMLasso, method, mu, limit, reduced, print_wave)
        
    (X_within, temporal_variables_within) = creation_data_within(selected)
    Y_within = creation_outcome_within(Y_train)
    
    (X_regression, Y_regression) = get_X_Y_within(X_within, Y_within, temporal_variables_within)
    
    return (regression(X_regression,Y_regression), names_var, X_regression, Y_regression)

### Optimisation of parameters with validation set

In [65]:
def summary_model (X_train, Y_train, X_valid, Y_valid, HMLasso, method, mu, limit, reduced, coef, print_wave):
    
    (model, names_var, X_regression, Y_regression) = Within_estimates(X_train, Y_train, HMLasso, method, mu, limit, reduced, print_wave)
    
    #Selection of columns in the validation set
    selected_valid = X_valid[list(names_var)]
    
    (X_valid_within, temporal_variables_within) = creation_data_within(selected_valid)
    Y_valid_within = creation_outcome_within(Y_valid)
    
    (X_valid_regression, Y_valid_regression) = get_X_Y_within(X_valid_within, Y_valid_within, temporal_variables_within)
    
    R_square_train = model.score(X_regression,Y_regression)
    R_square_valid = model.score(X_valid_regression,Y_valid_regression, sample_weight=None)
    
    intercept = model.intercept_
    coefficients = model.coef_
    
    RMSE_train = RMSE(X_regression, Y_regression, model)
    RMSE_valid = RMSE(X_valid_regression, Y_valid_regression, model)

    if coef:
        summary = {"HMLasso" : HMLasso, "mu" : mu, "R_square_train" : R_square_train, "RMSE_train" : RMSE_train, "R_square_valid" : R_square_valid, "RMSE_valid" : RMSE_valid, "variables kept": len(list(names_var)), "intercept" : intercept, "coefficients" : coefficients}
        return (summary, model, names_var)
    
    else:
        summary = {"HMLasso" : HMLasso, "mu" : mu, "R_square_train" : R_square_train, "RMSE_train" : RMSE_train, "R_square_valid" : R_square_valid, "RMSE_valid" : RMSE_valid, "variables kept": len(list(names_var))}
        return (summary, model, names_var)

In [66]:
from sklearn.metrics import mean_squared_error

def RMSE(X, Y, model):
    Y_predict = model.predict(X)
    
    MSE = mean_squared_error(Y, Y_predict)
    
    return MSE

In [67]:
def multiples_models(X_train, Y_train, X_valid, Y_valid, list_mu, HMLasso, method, limit, reduced, print_wave):
    
    Frame = pd.DataFrame(columns = ["HMLasso", "mu", "R_square_train", "RMSE_train", "R_square_valid", "RMSE_valid", "variables kept"])
    
    for mu in list_mu:
        summary_ML = summary_model(X_train, Y_train, X_valid, Y_valid, HMLasso, method, mu, limit, reduced, coef=False, print_wave=print_wave)[0]
        Frame = Frame.append(summary_ML, ignore_index=True)
        
    return Frame

In [40]:
list_mu = np.linspace(0.005,0.1,20)

In [47]:
Frame = multiples_models(X_train, Y_train, X_valid, Y_valid, list_mu, HMLasso =False, method ="mean", limit= -14, reduced =False, print_wave =True)

Lasso selection, mu = 0.005
wave 1
Variables kept : 303
wave 2
Variables kept : 476
wave 3
Variables kept : 566
wave 4
Variables kept : 635
wave 5
Variables kept : 671
wave 6
Variables kept : 694
wave 7
Variables kept : 714
wave 8
Variables kept : 678
wave 9
Variables kept : 678
wave 10
Variables kept : 665
wave 11
Variables kept : 646
wave 12
Variables kept : 625
wave 13
Variables kept : 596
wave 14
Variables kept : 644


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.01
wave 1
Variables kept : 218
wave 2
Variables kept : 261
wave 3
Variables kept : 307
wave 4
Variables kept : 305
wave 5
Variables kept : 318
wave 6
Variables kept : 330
wave 7
Variables kept : 303
wave 8
Variables kept : 293
wave 9
Variables kept : 291
wave 10
Variables kept : 294
wave 11
Variables kept : 273
wave 12
Variables kept : 260
wave 13
Variables kept : 237
wave 14
Variables kept : 279


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.015
wave 1
Variables kept : 145
wave 2
Variables kept : 172
wave 3
Variables kept : 191
wave 4
Variables kept : 187
wave 5
Variables kept : 205
wave 6
Variables kept : 180
wave 7
Variables kept : 176
wave 8
Variables kept : 173
wave 9
Variables kept : 181
wave 10
Variables kept : 160
wave 11
Variables kept : 160
wave 12
Variables kept : 146
wave 13
Variables kept : 131
wave 14
Variables kept : 154


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.02
wave 1
Variables kept : 104
wave 2
Variables kept : 111
wave 3
Variables kept : 142
wave 4
Variables kept : 138
wave 5
Variables kept : 147
wave 6
Variables kept : 122
wave 7
Variables kept : 109
wave 8
Variables kept : 119
wave 9
Variables kept : 114
wave 10
Variables kept : 110
wave 11
Variables kept : 103
wave 12
Variables kept : 103
wave 13
Variables kept : 96
wave 14
Variables kept : 116


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.025
wave 1
Variables kept : 77
wave 2
Variables kept : 81
wave 3
Variables kept : 105
wave 4
Variables kept : 101
wave 5
Variables kept : 122
wave 6
Variables kept : 97
wave 7
Variables kept : 78
wave 8
Variables kept : 96
wave 9
Variables kept : 86
wave 10
Variables kept : 90
wave 11
Variables kept : 74
wave 12
Variables kept : 83
wave 13
Variables kept : 72
wave 14
Variables kept : 95


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.03
wave 1
Variables kept : 53
wave 2
Variables kept : 61
wave 3
Variables kept : 72
wave 4
Variables kept : 76
wave 5
Variables kept : 73
wave 6
Variables kept : 74
wave 7
Variables kept : 67
wave 8
Variables kept : 74
wave 9
Variables kept : 72
wave 10
Variables kept : 76
wave 11
Variables kept : 61
wave 12
Variables kept : 71
wave 13
Variables kept : 61
wave 14
Variables kept : 75


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.035
wave 1
Variables kept : 43
wave 2
Variables kept : 48
wave 3
Variables kept : 56
wave 4
Variables kept : 60
wave 5
Variables kept : 61
wave 6
Variables kept : 66
wave 7
Variables kept : 56
wave 8
Variables kept : 63
wave 9
Variables kept : 62
wave 10
Variables kept : 62
wave 11
Variables kept : 56
wave 12
Variables kept : 61
wave 13
Variables kept : 57
wave 14
Variables kept : 61


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.04
wave 1
Variables kept : 34
wave 2
Variables kept : 36
wave 3
Variables kept : 50
wave 4
Variables kept : 52
wave 5
Variables kept : 53
wave 6
Variables kept : 54
wave 7
Variables kept : 43
wave 8
Variables kept : 52
wave 9
Variables kept : 57
wave 10
Variables kept : 50
wave 11
Variables kept : 51
wave 12
Variables kept : 50
wave 13
Variables kept : 49
wave 14
Variables kept : 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.045
wave 1
Variables kept : 29
wave 2
Variables kept : 30
wave 3
Variables kept : 40
wave 4
Variables kept : 43
wave 5
Variables kept : 43
wave 6
Variables kept : 51
wave 7
Variables kept : 42
wave 8
Variables kept : 45
wave 9
Variables kept : 50
wave 10
Variables kept : 46
wave 11
Variables kept : 44
wave 12
Variables kept : 47
wave 13
Variables kept : 46
wave 14
Variables kept : 52


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.05
wave 1
Variables kept : 26
wave 2
Variables kept : 28
wave 3
Variables kept : 32
wave 4
Variables kept : 38
wave 5
Variables kept : 40
wave 6
Variables kept : 47
wave 7
Variables kept : 38
wave 8
Variables kept : 42
wave 9
Variables kept : 47
wave 10
Variables kept : 41
wave 11
Variables kept : 41
wave 12
Variables kept : 44
wave 13
Variables kept : 42
wave 14
Variables kept : 49


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.055
wave 1
Variables kept : 19
wave 2
Variables kept : 23
wave 3
Variables kept : 34
wave 4
Variables kept : 36
wave 5
Variables kept : 35
wave 6
Variables kept : 41
wave 7
Variables kept : 38
wave 8
Variables kept : 39
wave 9
Variables kept : 39
wave 10
Variables kept : 38
wave 11
Variables kept : 39
wave 12
Variables kept : 44
wave 13
Variables kept : 40
wave 14
Variables kept : 45


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.06
wave 1
Variables kept : 15
wave 2
Variables kept : 21
wave 3
Variables kept : 30
wave 4
Variables kept : 33
wave 5
Variables kept : 33
wave 6
Variables kept : 40
wave 7
Variables kept : 33
wave 8
Variables kept : 35
wave 9
Variables kept : 36
wave 10
Variables kept : 34
wave 11
Variables kept : 37
wave 12
Variables kept : 40
wave 13
Variables kept : 34
wave 14
Variables kept : 41


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.065
wave 1
Variables kept : 14
wave 2
Variables kept : 18
wave 3
Variables kept : 28
wave 4
Variables kept : 30
wave 5
Variables kept : 32
wave 6
Variables kept : 35
wave 7
Variables kept : 29
wave 8
Variables kept : 32
wave 9
Variables kept : 33
wave 10
Variables kept : 33
wave 11
Variables kept : 35
wave 12
Variables kept : 38
wave 13
Variables kept : 32
wave 14
Variables kept : 36


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.07
wave 1
Variables kept : 13
wave 2
Variables kept : 14
wave 3
Variables kept : 25
wave 4
Variables kept : 26
wave 5
Variables kept : 30
wave 6
Variables kept : 33
wave 7
Variables kept : 28
wave 8
Variables kept : 30
wave 9
Variables kept : 32
wave 10
Variables kept : 25
wave 11
Variables kept : 31
wave 12
Variables kept : 37
wave 13
Variables kept : 28
wave 14
Variables kept : 33


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.075
wave 1
Variables kept : 9
wave 2
Variables kept : 13
wave 3
Variables kept : 22
wave 4
Variables kept : 20
wave 5
Variables kept : 27
wave 6
Variables kept : 32
wave 7
Variables kept : 27
wave 8
Variables kept : 30
wave 9
Variables kept : 30
wave 10
Variables kept : 22
wave 11
Variables kept : 28
wave 12
Variables kept : 34
wave 13
Variables kept : 24
wave 14
Variables kept : 30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

Lasso selection, mu = 0.08
wave 1
Variables kept : 9
wave 2
Variables kept : 12
wave 3
Variables kept : 20
wave 4
Variables kept : 19
wave 5
Variables kept : 26
wave 6
Variables kept : 32
wave 7
Variables kept : 25
wave 8
Variables kept : 27
wave 9
Variables kept : 31
wave 10
Variables kept : 21
wave 11
Variables kept : 28
wave 12
Variables kept : 32
wave 13
Variables kept : 20
wave 14
Variables kept : 26


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

Lasso selection, mu = 0.085
wave 1
Variables kept : 9
wave 2
Variables kept : 12
wave 3
Variables kept : 18
wave 4
Variables kept : 19
wave 5
Variables kept : 24
wave 6
Variables kept : 31
wave 7
Variables kept : 22
wave 8
Variables kept : 27
wave 9
Variables kept : 29
wave 10
Variables kept : 20
wave 11
Variables kept : 27
wave 12
Variables kept : 34
wave 13
Variables kept : 19
wave 14
Variables kept : 25


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

Lasso selection, mu = 0.09
wave 1
Variables kept : 8
wave 2
Variables kept : 10
wave 3
Variables kept : 17
wave 4
Variables kept : 18
wave 5
Variables kept : 20
wave 6
Variables kept : 30
wave 7
Variables kept : 21
wave 8
Variables kept : 25
wave 9
Variables kept : 27
wave 10
Variables kept : 18
wave 11
Variables kept : 25
wave 12
Variables kept : 30
wave 13
Variables kept : 18
wave 14
Variables kept : 22


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

Lasso selection, mu = 0.095
wave 1
Variables kept : 7
wave 2
Variables kept : 10
wave 3
Variables kept : 17
wave 4
Variables kept : 18
wave 5
Variables kept : 21
wave 6
Variables kept : 25
wave 7
Variables kept : 22
wave 8
Variables kept : 24
wave 9
Variables kept : 28
wave 10
Variables kept : 19
wave 11
Variables kept : 25
wave 12
Variables kept : 30
wave 13
Variables kept : 17
wave 14
Variables kept : 21


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

Lasso selection, mu = 0.1
wave 1
Variables kept : 5
wave 2
Variables kept : 10
wave 3
Variables kept : 16
wave 4
Variables kept : 18
wave 5
Variables kept : 19
wave 6
Variables kept : 22
wave 7
Variables kept : 22
wave 8
Variables kept : 25
wave 9
Variables kept : 28
wave 10
Variables kept : 19
wave 11
Variables kept : 23
wave 12
Variables kept : 28
wave 13
Variables kept : 15
wave 14
Variables kept : 22


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

In [48]:
Frame

Unnamed: 0,HMLasso,mu,R_square_train,RMSE_train,R_square_valid,RMSE_valid,variables kept
0,False,0.005,0.085457,1295.950989,0.006307616,1418.772,645
1,False,0.01,0.044984,1353.30318,-38345460000.0,54748800000000.0,280
2,False,0.015,0.041256,1358.585844,-205337600000000.0,2.931764e+17,155
3,False,0.02,0.031849,1371.915973,0.03598798,1376.395,117
4,False,0.025,0.032347,1371.209596,0.03041909,1384.346,96
5,False,0.03,0.025601,1380.769201,0.03392753,1379.337,76
6,False,0.035,0.023249,1384.101789,0.03748414,1374.259,62
7,False,0.04,0.017697,1391.970114,0.03315931,1380.434,57
8,False,0.045,0.017715,1391.943937,0.03076665,1383.85,53
9,False,0.05,0.021941,1385.956378,0.02660358,1389.794,50


In [73]:
list_mu = np.linspace(0.02,0.04,11)

In [74]:
Frame = multiples_models(X_train, Y_train, X_valid, Y_valid, list_mu, HMLasso =False, method ="mean", limit= -14, reduced =False, print_wave =True)

Lasso selection, mu = 0.02
wave 1
Variables kept : 104
wave 2
Variables kept : 111
wave 3
Variables kept : 142
wave 4
Variables kept : 138
wave 5
Variables kept : 147
wave 6
Variables kept : 122
wave 7
Variables kept : 109
wave 8
Variables kept : 119
wave 9
Variables kept : 114
wave 10
Variables kept : 110
wave 11
Variables kept : 103
wave 12
Variables kept : 103
wave 13
Variables kept : 96
wave 14
Variables kept : 116


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.022
wave 1
Variables kept : 91
wave 2
Variables kept : 97
wave 3
Variables kept : 120
wave 4
Variables kept : 123
wave 5
Variables kept : 141
wave 6
Variables kept : 111
wave 7
Variables kept : 95
wave 8
Variables kept : 107
wave 9
Variables kept : 101
wave 10
Variables kept : 100
wave 11
Variables kept : 88
wave 12
Variables kept : 95
wave 13
Variables kept : 83
wave 14
Variables kept : 106


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.024
wave 1
Variables kept : 81
wave 2
Variables kept : 85
wave 3
Variables kept : 109
wave 4
Variables kept : 103
wave 5
Variables kept : 133
wave 6
Variables kept : 102
wave 7
Variables kept : 79
wave 8
Variables kept : 99
wave 9
Variables kept : 91
wave 10
Variables kept : 91
wave 11
Variables kept : 78
wave 12
Variables kept : 87
wave 13
Variables kept : 75
wave 14
Variables kept : 99


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.026000000000000002
wave 1
Variables kept : 73
wave 2
Variables kept : 74
wave 3
Variables kept : 94
wave 4
Variables kept : 99
wave 5
Variables kept : 123
wave 6
Variables kept : 89
wave 7
Variables kept : 75
wave 8
Variables kept : 90
wave 9
Variables kept : 81
wave 10
Variables kept : 89
wave 11
Variables kept : 71
wave 12
Variables kept : 81
wave 13
Variables kept : 73
wave 14
Variables kept : 93


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.028
wave 1
Variables kept : 58
wave 2
Variables kept : 69
wave 3
Variables kept : 83
wave 4
Variables kept : 83
wave 5
Variables kept : 114
wave 6
Variables kept : 81
wave 7
Variables kept : 69
wave 8
Variables kept : 83
wave 9
Variables kept : 75
wave 10
Variables kept : 78
wave 11
Variables kept : 66
wave 12
Variables kept : 74
wave 13
Variables kept : 66
wave 14
Variables kept : 81


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.03
wave 1
Variables kept : 53
wave 2
Variables kept : 61
wave 3
Variables kept : 72
wave 4
Variables kept : 76
wave 5
Variables kept : 73
wave 6
Variables kept : 74
wave 7
Variables kept : 67
wave 8
Variables kept : 74
wave 9
Variables kept : 72
wave 10
Variables kept : 76
wave 11
Variables kept : 61
wave 12
Variables kept : 71
wave 13
Variables kept : 61
wave 14
Variables kept : 75


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.032
wave 1
Variables kept : 49
wave 2
Variables kept : 57
wave 3
Variables kept : 65
wave 4
Variables kept : 71
wave 5
Variables kept : 68
wave 6
Variables kept : 73
wave 7
Variables kept : 62
wave 8
Variables kept : 68
wave 9
Variables kept : 67
wave 10
Variables kept : 69
wave 11
Variables kept : 59
wave 12
Variables kept : 65
wave 13
Variables kept : 59
wave 14
Variables kept : 69


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.034
wave 1
Variables kept : 43
wave 2
Variables kept : 50
wave 3
Variables kept : 58
wave 4
Variables kept : 63
wave 5
Variables kept : 64
wave 6
Variables kept : 66
wave 7
Variables kept : 59
wave 8
Variables kept : 65
wave 9
Variables kept : 62
wave 10
Variables kept : 62
wave 11
Variables kept : 58
wave 12
Variables kept : 62
wave 13
Variables kept : 57
wave 14
Variables kept : 63


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.036000000000000004
wave 1
Variables kept : 41
wave 2
Variables kept : 44
wave 3
Variables kept : 53
wave 4
Variables kept : 58
wave 5
Variables kept : 59
wave 6
Variables kept : 64
wave 7
Variables kept : 53
wave 8
Variables kept : 58
wave 9
Variables kept : 61
wave 10
Variables kept : 59
wave 11
Variables kept : 56
wave 12
Variables kept : 59
wave 13
Variables kept : 57
wave 14
Variables kept : 61


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.038000000000000006
wave 1
Variables kept : 39
wave 2
Variables kept : 41
wave 3
Variables kept : 52
wave 4
Variables kept : 54
wave 5
Variables kept : 57
wave 6
Variables kept : 57
wave 7
Variables kept : 49
wave 8
Variables kept : 55
wave 9
Variables kept : 58
wave 10
Variables kept : 56
wave 11
Variables kept : 55
wave 12
Variables kept : 54
wave 13
Variables kept : 53
wave 14
Variables kept : 59


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


Lasso selection, mu = 0.04
wave 1
Variables kept : 34
wave 2
Variables kept : 36
wave 3
Variables kept : 50
wave 4
Variables kept : 52
wave 5
Variables kept : 53
wave 6
Variables kept : 54
wave 7
Variables kept : 43
wave 8
Variables kept : 52
wave 9
Variables kept : 57
wave 10
Variables kept : 50
wave 11
Variables kept : 51
wave 12
Variables kept : 50
wave 13
Variables kept : 49
wave 14
Variables kept : 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_1.loc[wave_1["INW1"] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wave_i.loc[wave_i["INWw".replace('w', str(i))] == 0] = np.nan
A value is trying to be set on a copy of

  Frame = Frame.append(summary_ML, ignore_index=True)


In [75]:
Frame

Unnamed: 0,HMLasso,mu,R_square_train,RMSE_train,R_square_valid,RMSE_valid,variables kept
0,False,0.02,0.031849,1371.915973,0.035988,1376.39491,117
1,False,0.022,0.029293,1375.537383,0.034877,1377.980577,107
2,False,0.024,0.029017,1375.929453,0.034806,1378.081862,100
3,False,0.026,0.032106,1371.552271,0.029237,1386.034397,94
4,False,0.028,0.027424,1378.185831,0.031042,1383.456274,82
5,False,0.03,0.025601,1380.769201,0.033928,1379.336784,76
6,False,0.032,0.022616,1384.999063,0.035074,1377.699608,70
7,False,0.034,0.023437,1383.836602,0.036305,1375.942055,64
8,False,0.036,0.020102,1388.561435,0.031165,1383.280637,62
9,False,0.038,0.018121,1391.368313,0.032078,1381.977652,60


### Application on test set

In [None]:
def test_model (X_train, Y_train, X_test, Y_test, HMLasso, method, mu, limit, reduced, coef, print_wave):
    
    (model, names_var, X_regression, Y_regression) = Within_estimates(X_train, Y_train, HMLasso, method, mu, limit, reduced, print_wave)
    
    #Selection of columns in the validation set
    selected_test = X_test[list(names_var)]
    
    (X_test_within, temporal_variables_within) = creation_data_within(selected_test)
    Y_test_within = creation_outcome_within(Y_test)
    
    (X_test_regression, Y_test_regression) = get_X_Y_within(X_test_within, Y_test_within, temporal_variables_within)
    
    R_square_train = model.score(X_regression,Y_regression)
    R_square_test = model.score(X_test_regression,Y_test_regression, sample_weight=None)
    
    return {"HMLasso" : HMLasso, "mu" : mu, "R_square_train" : R_square_train, "R_square_test" : R_square_test, "variables kept": len(list(names_var))}
