### Parameters

In [1]:
SEED = 123  # modifiable seed
CLF_SS = 2      # sub-sample model types for faster run
TARGETS = -1    # which target (0-4) to predict; -1 for all

### Imports

In [2]:
import numpy as np  
import pandas as pd 
import pickle

In [3]:
import multiprocessing
from joblib import Parallel, delayed

In [4]:
from collections import Counter
import datetime as datetime

In [5]:
import gc
import psutil
import sys

In [6]:
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
rcParams['figure.figsize'] = (15,5.5)

pd.options.display.max_rows = 150

In [7]:
start = datetime.datetime.now()

if SEED < 0:
    np.random.seed(datetime.datetime.now().microsecond)
else:
    np.random.seed(SEED)

### Data Loading

In [8]:
path = '/kaggle/input/trends-assessment-prediction/'

loading =  pd.read_csv(path+ '/' + 'loading.csv').set_index('Id')
fnc =  pd.read_csv(path+ '/' + 'fnc.csv').set_index('Id')
assert len(loading) == len(fnc)


In [9]:
y_data =  pd.read_csv(path+ '/' + 'train_scores.csv').set_index('Id')

data = pd.concat((loading, fnc,  ), axis = 'columns')  
test_data = data[~data.index.isin(y_data.index)]

X = data.loc[y_data.index] 
y = y_data 
groups = np.random.randint(0, 5, len(y))

### Model Setup

In [10]:
from sklearn.model_selection import RandomizedSearchCV, RepeatedKFold, KFold, ShuffleSplit
from sklearn.svm import SVR, NuSVR
from sklearn.linear_model import ElasticNet, Ridge, Lasso
from sklearn.model_selection import ParameterSampler
from sklearn.metrics import make_scorer, mean_absolute_error
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler, FunctionTransformer 

In [11]:
nusvr_params = {
    'kernel': [  'rbf',  ] , 
    'C': [ 1, 2, 3, 5, 7, 10, 15, 20, 30, 50, 60,80, 100, 130, 200  ],
    'gamma': [ 'scale'], 
    'nu': [   0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1] }

def trainNuSVR(x, y, groups, cv = 0, n_jobs = -1, **kwargs):
    clf = NuSVR(cache_size=1000)
    params = nusvr_params        
    return trainModel(x, y, groups, clf, params, cv, n_jobs,  **kwargs)

In [12]:
enet_params = { 'alpha': [  1e-5, 2e-5, 5e-5, 1e-4, 2e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 3e-2, 0.1, 0.3,   ],
                'l1_ratio': [ 0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.95, 0.97, 0.98, 0.99, 1,   ]}

def trainENet(x, y, groups, cv = 0, n_jobs = -1, **kwargs):
    clf = ElasticNet(normalize = True, selection = 'random', max_iter = 1000 )
    return trainModel(x, y, groups, clf, enet_params, cv, n_jobs, **kwargs)

In [13]:
def fnae(y_true, y_pred):
    valid = ~np.isnan(y_true)
    y_true = y_true[valid]
    y_pred = y_pred[valid]
    return np.sum(np.abs(y_true - y_pred))/np.sum(y_true)

fnae_scorer = make_scorer(fnae, greater_is_better = False)

In [14]:
def trainModel(x, y, groups, clf, params, cv = 0, n_jobs = None, 
                   verbose=0, splits=None, **kwargs):
    if n_jobs is None:
        n_jobs = -1    

    n_iter = 40
        
    folds = ShuffleSplit(n_splits = 7, train_size = 0.75, test_size = 0.20)
    clf = RandomizedSearchCV(clf, params, cv = folds, n_iter = n_iter, 
                            verbose = 1, n_jobs = n_jobs, scoring = fnae_scorer)
    
    f = clf.fit(x, y, groups)
    
    print(pd.DataFrame(clf.cv_results_['mean_test_score'])); print();  
    best = clf.best_estimator_;  print(best)
    print("Best Score: {}".format(np.round(clf.best_score_,4)))
    
    return best

In [15]:
def cleanX(X, target):
    X = X.copy()
    
    for col in fnc.columns:
        X[col] = X[col] / 500
       
    return X;

In [16]:
def runBag(n = 3, model_type = trainENet, data = None, **kwargs):
    start_time = datetime.datetime.now(); 
    
    X, y, groups = data

    valid = ~y.isnull()
    X = X[valid]; y = y[valid]; groups = groups[valid]
    
    if 'target' in kwargs:
        X = cleanX(X, kwargs['target'])
    
    group_list = [*dict.fromkeys(groups)]   
    group_list.sort()
    
    clfs = []; preds = []; ys=[]; datestack = []
    for group in group_list:
        g = gc.collect()
        x_holdout = X[groups == group]
        y_holdout = y[groups == group]
        x_train = X[groups != group]
        y_train = y[groups != group]
        
        groups_train = groups[groups != group]

        model = model_type 
        clf = model(x_train, y_train, groups_train, **kwargs) 
        clfs.append(clf)

        predicted = clf.predict(x_holdout)
        print("{}: {:.4f}".format(group,
              fnae(y_holdout, predicted)  ) )
        
        preds.append(predicted)
        ys.append(y_holdout)
    
    y_pred = np.concatenate(preds)
    y_ho = np.concatenate(ys) 

    end_time = datetime.datetime.now(); 
    print("\nModel Bag Time: {}\n".format(str(end_time - start_time).split('.', 2)[0] ))
    return clfs

In [17]:
def trainBaseClfs(clfs, clf_names, data, target = None, **kwargs):
    start_time = datetime.datetime.now(); 
    
    X, y, groups = data
    
    X = cleanX(X, target)
    
    group_list = [*dict.fromkeys(groups)]   
    group_list.sort()
    
    X_ordered = []; y_ordered = []; groups_ordered =[]  
    all_base_clfs = []; base_preds = [[] for i in range(0, 5 * len(clfs))]; 
    for group in group_list:
        print("Training Fold {} of {}:".format(group, len(group_list)))
        np.random.seed(SEED)
        
        x_holdout = X[groups == group]
        y_holdout = y[groups == group]
        x_train = X[groups != group]
        y_train = y[groups != group]

        y_idx = ALL_TARGETS.index(target)
        
        X_ordered.append(x_holdout)
        y_ordered.append(y_holdout)
        groups_ordered.append(groups[groups == group])
        
        base_clfs = []
        for idx, clf in enumerate(clfs):
            base_clfs.append(clone(clf))
        
        def train_model(model, X, y):
            ss = (~pd.DataFrame(y).isnull().any(axis=1))
            model.fit(X[ss], y[ss]); return model
        
        base_clfs = Parallel(n_jobs=4)(delayed(train_model)(model, x_train, y_train[y_var]) for model in base_clfs)
        all_base_clfs.append(base_clfs)
        
        def predict_model(model, X):
            o = model.predict(X); return o    
        preds = Parallel(n_jobs=4)(delayed(predict_model)(model, x_holdout) for model in base_clfs)
        
        
        pidx = 0; clf_pred_names = []
        for idx, clf in enumerate(base_clfs):   
            print("{:.4f} for {}".format( 
                      fnae(y_holdout[target], preds[idx]), clf_names[idx]  ) )
            base_preds[pidx].append(preds[idx]); pidx+=1;
            clf_pred_names.append(clf_names[idx])
            
        print("\nTime Elapsed: {}\n".format(str(datetime.datetime.now() - start_time).split('.', 2)[0] ))

    base_preds = base_preds[:len(clf_pred_names)]
    for idx in range(0, len(base_preds)):
        base_preds[idx] = np.concatenate(base_preds[idx])

    
    print("\Base Classifier Train Time: {}\n".format(str(datetime.datetime.now() - start_time).split('.', 2)[0] ))
    return (all_base_clfs, base_preds, clf_pred_names, 
        pd.concat(X_ordered), pd.concat(y_ordered), np.concatenate(groups_ordered))

In [18]:
def Lassos():
    clfs = []; clf_names = []
    lassos =  [1e-5, 3e-5, 1e-4,  3e-4,  0.001, 0.003,  0.01,  0.03,  0.1,  0.3,  1, ]
    for l in lassos:
        clfs.append(Lasso(alpha = l,  selection = 'random', max_iter = 1000))
        clf_names.append('Lasso alpha={}'.format(l))
        if CLF_SS > 1:
            clfs.append(clfs[-1]); clf_names.append(clf_names[-1])
 
    return clfs, clf_names

In [19]:
def Ridges():
    clfs = []; clf_names = []
    ridges =  [1e-4,  3e-4,  0.001, 0.003,  0.01,  0.03,  0.1,  0.3,  1,  3,  10,    ]
    for r in ridges:
        clfs.append(Ridge(alpha = r))
        clf_names.append('Ridge alpha={}'.format(r))
        if CLF_SS > 1:
            clfs.append(clfs[-1]); clf_names.append(clf_names[-1])

    return clfs, clf_names

In [20]:
def SVRs():
    clfs = []; clf_names = []
    svrs =  ([0.2, 1, 7,30, 50, 60, 100], [1, 3, 7]) 
    for c in svrs[0]:
        for e in svrs[1]:
            clfs.append(SVR(C = c, epsilon = e, cache_size=1000))
            clf_names.append('SVR C={}, epsilon={}'.format(c,e))
            
    return clfs, clf_names

In [21]:
def ENets():
    clfs = []; clf_names = []
    enets = ([1e-4, 3e-4, 1e-3, 3e-3  ], [ 0, 0.1, 0.5, 0.9, 0.95, 1],[50,100,500,1000]) 
    for a in enets[0]:
        for l in enets[1]:
            for n in enets[2]:
                clfs.append(ElasticNet(alpha = a, l1_ratio = l,
                         normalize = False, selection = 'random', 
                         max_iter = n))
                clf_names.append('Enet alpha={}, l1_ratio={}, n_iter={}'.format(a,l,n))
 
    for a in enets[0]:
        for l in enets[1]:
            for n in enets[2]:
                clfs.append(ElasticNet(alpha = a, l1_ratio = l,
                         normalize = True, selection = 'random', 
                         max_iter = n ))
                clf_names.append('Enet-n alpha={}, l1_ratio={}, n_iter={}'.format(a,l,n))
            
    return clfs, clf_names

In [22]:
def getBaseClfs(y_var):
    idx = ALL_TARGETS.index(y_var)

    clfs = []
    clf_names = []
    
    model_sets =  [SVRs(), ENets(), Lassos(), Ridges()]
    for model_set in model_sets:
        clfs.extend(model_set[0])
        clf_names.extend(model_set[1])
   

    return clfs[::CLF_SS], clf_names[::CLF_SS];


In [23]:
ALL_TARGETS = y.columns.to_list()  
if isinstance(TARGETS, list):
    targets = [ALL_TARGETS[i] for i in TARGETS]
elif TARGETS is not None and TARGETS >= 0:
    targets = ALL_TARGETS[TARGETS: TARGETS + 1]
else:
    targets = ALL_TARGETS
# print(targets)

In [24]:
def metaFilter(X):
    return X[[c for c in X.columns if c not in data.columns or c in loading.columns ]] 

### Train Models

In [25]:
all_clfs = []; all_raw_base_clfs = []; all_base_clfs = []; scalers = []
for idx, y_var in enumerate(targets):
    print('---Training Models for {}---\n'.format(y_var))
       
    
    # train base classifiers
    raw_base_clfs, base_clf_names = getBaseClfs(y_var)
    all_raw_base_clfs.append((raw_base_clfs, base_clf_names))
    
    base_clfs, base_clf_preds, base_clf_names, Xe, ye, ge = \
                    trainBaseClfs(raw_base_clfs, base_clf_names, 
                                  data = (X, y, groups), 
                                  target=y_var, )
    Xe = pd.concat( (Xe, pd.DataFrame( dict(zip(base_clf_names, base_clf_preds)), index=Xe.index) ),
                     axis = 'columns')
    
    all_base_clfs.append((base_clfs, base_clf_preds, base_clf_names, Xe, ye, ge ))
    
    
    # train meta model
    s = FunctionTransformer()
    if y_var == 'age':
        meta_model = trainNuSVR
    else:
        meta_model = trainENet
     
    s.fit(metaFilter(Xe))
    scalers.append(s)
    
    all_clfs.append( runBag(data = (s.transform(metaFilter(Xe)), ye[y_var], ge), # target=y_var,
                                   model_type = meta_model) )
    # run

---Training Models for age---

Training Fold 0 of 5:
0.1834 for SVR C=0.2, epsilon=1
0.1878 for SVR C=0.2, epsilon=7
0.1638 for SVR C=1, epsilon=3
0.1541 for SVR C=7, epsilon=1
0.1553 for SVR C=7, epsilon=7
0.1491 for SVR C=30, epsilon=3
0.1480 for SVR C=50, epsilon=1
0.1498 for SVR C=50, epsilon=7
0.1480 for SVR C=60, epsilon=3
0.1479 for SVR C=100, epsilon=1
0.1492 for SVR C=100, epsilon=7
0.1921 for Enet alpha=0.0001, l1_ratio=0, n_iter=100
0.1921 for Enet alpha=0.0001, l1_ratio=0, n_iter=1000
0.1903 for Enet alpha=0.0001, l1_ratio=0.1, n_iter=100
0.1903 for Enet alpha=0.0001, l1_ratio=0.1, n_iter=1000
0.1806 for Enet alpha=0.0001, l1_ratio=0.5, n_iter=100
0.1806 for Enet alpha=0.0001, l1_ratio=0.5, n_iter=1000
0.1636 for Enet alpha=0.0001, l1_ratio=0.9, n_iter=100
0.1636 for Enet alpha=0.0001, l1_ratio=0.9, n_iter=1000
0.1602 for Enet alpha=0.0001, l1_ratio=0.95, n_iter=100
0.1602 for Enet alpha=0.0001, l1_ratio=0.95, n_iter=1000
0.1537 for Enet alpha=0.0001, l1_ratio=1, n_iter=100

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   45.6s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.8min finished


           0
0  -0.141215
1  -0.141161
2  -0.140909
3  -0.140861
4  -0.140749
5  -0.140611
6  -0.141053
7  -0.140850
8  -0.140635
9  -0.141138
10 -0.140762
11 -0.141653
12 -0.140950
13 -0.140978
14 -0.140958
15 -0.141068
16 -0.141012
17 -0.140700
18 -0.140730
19 -0.141104
20 -0.141117
21 -0.140748
22 -0.141460
23 -0.141568
24 -0.140831
25 -0.141568
26 -0.140612
27 -0.141117
28 -0.140741
29 -0.140908
30 -0.141480
31 -0.140681
32 -0.141356
33 -0.140900
34 -0.141160
35 -0.141095
36 -0.140952
37 -0.140790
38 -0.141436
39 -0.141498

NuSVR(C=50, cache_size=1000, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.6, shrinking=True, tol=0.001, verbose=False)
Best Score: -0.1406
0: 0.1474
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   33.4s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.5min finished


           0
0  -0.141098
1  -0.142100
2  -0.141407
3  -0.141755
4  -0.141522
5  -0.141559
6  -0.140993
7  -0.141327
8  -0.141823
9  -0.141097
10 -0.141186
11 -0.141757
12 -0.140984
13 -0.141101
14 -0.141983
15 -0.141768
16 -0.141913
17 -0.141171
18 -0.141192
19 -0.141075
20 -0.141449
21 -0.141147
22 -0.141476
23 -0.142248
24 -0.141698
25 -0.142022
26 -0.141528
27 -0.141423
28 -0.141474
29 -0.142170
30 -0.141025
31 -0.141111
32 -0.141323
33 -0.141231
34 -0.141093
35 -0.141084
36 -0.141102
37 -0.141395
38 -0.141157
39 -0.141301

NuSVR(C=30, cache_size=1000, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.8, shrinking=True, tol=0.001, verbose=False)
Best Score: -0.141
1: 0.1416
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.0min finished


           0
0  -0.141602
1  -0.141623
2  -0.141648
3  -0.141765
4  -0.142359
5  -0.141856
6  -0.141920
7  -0.141624
8  -0.141475
9  -0.142119
10 -0.141779
11 -0.141806
12 -0.141610
13 -0.141559
14 -0.142084
15 -0.141414
16 -0.141385
17 -0.141719
18 -0.141924
19 -0.141484
20 -0.141592
21 -0.141521
22 -0.141878
23 -0.142119
24 -0.141569
25 -0.142027
26 -0.141964
27 -0.141523
28 -0.142252
29 -0.142167
30 -0.141565
31 -0.141737
32 -0.141408
33 -0.141529
34 -0.141488
35 -0.142218
36 -0.141653
37 -0.141703
38 -0.141519
39 -0.141566

NuSVR(C=80, cache_size=1000, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.4, shrinking=True, tol=0.001, verbose=False)
Best Score: -0.1414
2: 0.1423
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   45.1s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.5min finished


           0
0  -0.144548
1  -0.144588
2  -0.144603
3  -0.144777
4  -0.144171
5  -0.144599
6  -0.144656
7  -0.144890
8  -0.144256
9  -0.144901
10 -0.144844
11 -0.144694
12 -0.144174
13 -0.144212
14 -0.144794
15 -0.144367
16 -0.145031
17 -0.145118
18 -0.145072
19 -0.144392
20 -0.144718
21 -0.144825
22 -0.144628
23 -0.144570
24 -0.144296
25 -0.145463
26 -0.145301
27 -0.145020
28 -0.145231
29 -0.144447
30 -0.145039
31 -0.144569
32 -0.144770
33 -0.144184
34 -0.144491
35 -0.144510
36 -0.144677
37 -0.144732
38 -0.144813
39 -0.145015

NuSVR(C=100, cache_size=1000, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False)
Best Score: -0.1442
3: 0.1385
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   45.5s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.9min finished


           0
0  -0.145139
1  -0.145111
2  -0.145371
3  -0.145295
4  -0.145163
5  -0.146118
6  -0.145079
7  -0.145108
8  -0.144960
9  -0.145477
10 -0.145546
11 -0.145042
12 -0.144999
13 -0.145015
14 -0.145347
15 -0.144969
16 -0.145042
17 -0.145067
18 -0.145273
19 -0.145558
20 -0.145578
21 -0.145081
22 -0.145056
23 -0.145079
24 -0.145106
25 -0.145579
26 -0.145575
27 -0.145309
28 -0.145882
29 -0.145183
30 -0.145642
31 -0.145033
32 -0.145185
33 -0.145180
34 -0.145560
35 -0.145741
36 -0.144973
37 -0.145055
38 -0.144994
39 -0.145191

NuSVR(C=30, cache_size=1000, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False)
Best Score: -0.145
4: 0.1402

Model Bag Time: 0:23:10

---Training Models for domain1_var1---

Training Fold 0 of 5:
0.1505 for SVR C=0.2, epsilon=1
0.1508 for SVR C=0.2, epsilon=7
0.1487 for SVR C=1, epsilon=3
0.1482 for SVR C=7, epsilon=1
0.1471 for SVR C=7, epsilon=7
0.1481 for SVR C=30, epsilon=3
0.1497 for SVR C

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   11.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.153287
1  -0.153392
2  -0.153201
3  -0.158999
4  -0.155195
5  -0.154553
6  -0.153303
7  -0.160393
8  -0.153071
9  -0.159777
10 -0.159071
11 -0.153257
12 -0.153330
13 -0.153357
14 -0.153195
15 -0.153177
16 -0.153439
17 -0.153412
18 -0.153446
19 -0.153044
20 -0.153055
21 -0.153462
22 -0.154510
23 -0.153188
24 -0.153147
25 -0.153446
26 -0.153446
27 -0.154723
28 -0.154965
29 -0.153400
30 -0.153393
31 -0.153004
32 -0.157500
33 -0.157159
34 -0.153088
35 -0.155122
36 -0.157120
37 -0.160192
38 -0.153244
39 -0.153449

ElasticNet(alpha=0.005, copy_X=True, fit_intercept=True, l1_ratio=0.95,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.153
0: 0.1466
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   15.8s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.150228
1  -0.158473
2  -0.158473
3  -0.150750
4  -0.150789
5  -0.150787
6  -0.150908
7  -0.158473
8  -0.158473
9  -0.150794
10 -0.150774
11 -0.152558
12 -0.150773
13 -0.150771
14 -0.150703
15 -0.150199
16 -0.150220
17 -0.150772
18 -0.150707
19 -0.155521
20 -0.150720
21 -0.151144
22 -0.150873
23 -0.150766
24 -0.150786
25 -0.150750
26 -0.150597
27 -0.150499
28 -0.150707
29 -0.158473
30 -0.150663
31 -0.150579
32 -0.150661
33 -0.150603
34 -0.157103
35 -0.151179
36 -0.150754
37 -0.150894
38 -0.150788
39 -0.152665

ElasticNet(alpha=2e-05, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1502
1: 0.1550
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   23.6s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.156826
1  -0.150316
2  -0.149507
3  -0.150190
4  -0.150301
5  -0.155503
6  -0.150372
7  -0.150278
8  -0.150323
9  -0.150404
10 -0.150163
11 -0.156826
12 -0.150261
13 -0.154607
14 -0.150352
15 -0.150300
16 -0.150166
17 -0.150205
18 -0.150386
19 -0.150286
20 -0.156826
21 -0.150206
22 -0.149537
23 -0.149734
24 -0.150368
25 -0.152049
26 -0.150451
27 -0.150543
28 -0.150543
29 -0.149919
30 -0.150498
31 -0.150318
32 -0.150544
33 -0.150283
34 -0.156826
35 -0.150159
36 -0.150190
37 -0.155159
38 -0.150539
39 -0.150483

ElasticNet(alpha=2e-05, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1495
2: 0.1502
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   19.2s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.152210
1  -0.152544
2  -0.152362
3  -0.152431
4  -0.152350
5  -0.152426
6  -0.152409
7  -0.152510
8  -0.152199
9  -0.152243
10 -0.151901
11 -0.156929
12 -0.151875
13 -0.152555
14 -0.154021
15 -0.152337
16 -0.152515
17 -0.152531
18 -0.152075
19 -0.152480
20 -0.152307
21 -0.160415
22 -0.152270
23 -0.160415
24 -0.152512
25 -0.152330
26 -0.152297
27 -0.152210
28 -0.152561
29 -0.152282
30 -0.152246
31 -0.152209
32 -0.156889
33 -0.151904
34 -0.152241
35 -0.160415
36 -0.151987
37 -0.160415
38 -0.152599
39 -0.154415

ElasticNet(alpha=5e-05, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1519
3: 0.1493
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   16.9s finished


           0
0  -0.150839
1  -0.150658
2  -0.150831
3  -0.157864
4  -0.151068
5  -0.150947
6  -0.150847
7  -0.151214
8  -0.151191
9  -0.150914
10 -0.150587
11 -0.151083
12 -0.158537
13 -0.151072
14 -0.150944
15 -0.151194
16 -0.150675
17 -0.151209
18 -0.150965
19 -0.150928
20 -0.151077
21 -0.151087
22 -0.150808
23 -0.151137
24 -0.150786
25 -0.151168
26 -0.151208
27 -0.152672
28 -0.158537
29 -0.158537
30 -0.158537
31 -0.151129
32 -0.151176
33 -0.150648
34 -0.150813
35 -0.151111
36 -0.152509
37 -0.151173
38 -0.151162
39 -0.150848

ElasticNet(alpha=0.0002, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1506
4: 0.1468

Model Bag Time: 0:01:31

---Training Models for domain1_var2---

Training Fold 0 of 5:
0.1495 for SVR C=0.2, epsilon=1
0.1497 for SVR C=0.2, epsilon=7
0.1490 for SVR C=1, epsilon=3
0.1487 for SVR C=7, epsilon=

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   11.4s finished


           0
0  -0.151104
1  -0.150202
2  -0.151015
3  -0.151243
4  -0.151264
5  -0.151181
6  -0.150910
7  -0.151264
8  -0.151101
9  -0.151264
10 -0.151251
11 -0.151132
12 -0.150497
13 -0.150095
14 -0.150921
15 -0.151046
16 -0.150262
17 -0.150663
18 -0.150280
19 -0.151109
20 -0.151101
21 -0.150378
22 -0.151152
23 -0.151032
24 -0.151249
25 -0.150524
26 -0.150517
27 -0.151265
28 -0.151264
29 -0.150115
30 -0.150084
31 -0.151101
32 -0.151264
33 -0.151223
34 -0.151106
35 -0.151264
36 -0.151216
37 -0.151264
38 -0.151147
39 -0.150505

ElasticNet(alpha=0.0005, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1501
0: 0.1480
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 273 out of 280 | elapsed:   14.1s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   14.1s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.151713
1  -0.152132
2  -0.152132
3  -0.151820
4  -0.151833
5  -0.151584
6  -0.151841
7  -0.152132
8  -0.152132
9  -0.151822
10 -0.151299
11 -0.152074
12 -0.151729
13 -0.151745
14 -0.151101
15 -0.151577
16 -0.151003
17 -0.151737
18 -0.151947
19 -0.152132
20 -0.151949
21 -0.151947
22 -0.152079
23 -0.151259
24 -0.151251
25 -0.151835
26 -0.151018
27 -0.151040
28 -0.151107
29 -0.152132
30 -0.150839
31 -0.151016
32 -0.151050
33 -0.151015
34 -0.152113
35 -0.151871
36 -0.151831
37 -0.152068
38 -0.151435
39 -0.152132

ElasticNet(alpha=0.0005, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1508
1: 0.1468
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   23.0s finished


           0
0  -0.151747
1  -0.151597
2  -0.150893
3  -0.151723
4  -0.151796
5  -0.151734
6  -0.150704
7  -0.151726
8  -0.151736
9  -0.151629
10 -0.151742
11 -0.151747
12 -0.150660
13 -0.151747
14 -0.150694
15 -0.151686
16 -0.151796
17 -0.151750
18 -0.150707
19 -0.151749
20 -0.151747
21 -0.151818
22 -0.150912
23 -0.150763
24 -0.151701
25 -0.151747
26 -0.150768
27 -0.151235
28 -0.151097
29 -0.150679
30 -0.151501
31 -0.150643
32 -0.151219
33 -0.151813
34 -0.151747
35 -0.151779
36 -0.151724
37 -0.151747
38 -0.151035
39 -0.151505

ElasticNet(alpha=0.0001, copy_X=True, fit_intercept=True, l1_ratio=0.95,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1506
2: 0.1500
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   18.5s finished


           0
0  -0.148162
1  -0.147656
2  -0.147583
3  -0.148074
4  -0.148198
5  -0.148080
6  -0.148030
7  -0.147974
8  -0.148190
9  -0.147606
10 -0.147881
11 -0.148180
12 -0.147882
13 -0.147688
14 -0.148144
15 -0.148107
16 -0.147717
17 -0.147928
18 -0.147679
19 -0.147886
20 -0.147594
21 -0.148210
22 -0.148163
23 -0.148210
24 -0.147970
25 -0.148141
26 -0.148145
27 -0.148162
28 -0.147749
29 -0.148160
30 -0.148168
31 -0.148162
32 -0.148176
33 -0.148025
34 -0.148169
35 -0.148210
36 -0.148199
37 -0.148210
38 -0.148137
39 -0.148210

ElasticNet(alpha=0.0002, copy_X=True, fit_intercept=True, l1_ratio=0.98,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1476
3: 0.1516
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   16.2s finished


           0
0  -0.151212
1  -0.150661
2  -0.151224
3  -0.151493
4  -0.150496
5  -0.151110
6  -0.150447
7  -0.150779
8  -0.150493
9  -0.151425
10 -0.150448
11 -0.151274
12 -0.151493
13 -0.150498
14 -0.151176
15 -0.150921
16 -0.150802
17 -0.150717
18 -0.150485
19 -0.151437
20 -0.150495
21 -0.151095
22 -0.151193
23 -0.150766
24 -0.151263
25 -0.150582
26 -0.150680
27 -0.151492
28 -0.151493
29 -0.151493
30 -0.151493
31 -0.151041
32 -0.150972
33 -0.150775
34 -0.151246
35 -0.151228
36 -0.151379
37 -0.150633
38 -0.150977
39 -0.151193

ElasticNet(alpha=0.0001, copy_X=True, fit_intercept=True, l1_ratio=0.97,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1504
4: 0.1491

Model Bag Time: 0:01:25

---Training Models for domain2_var1---

Training Fold 0 of 5:
0.1792 for SVR C=0.2, epsilon=1
0.1793 for SVR C=0.2, epsilon=7
0.1775 for SVR C=1, epsilon=3
0.1770 for SVR C=7, epsil

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   12.0s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.184434
1  -0.182787
2  -0.183428
3  -0.188425
4  -0.187729
5  -0.185857
6  -0.183462
7  -0.189085
8  -0.183936
9  -0.189026
10 -0.188484
11 -0.184448
12 -0.182228
13 -0.182647
14 -0.183433
15 -0.183424
16 -0.183075
17 -0.183472
18 -0.183126
19 -0.183546
20 -0.183520
21 -0.183316
22 -0.185744
23 -0.183426
24 -0.184285
25 -0.183430
26 -0.183422
27 -0.186315
28 -0.187015
29 -0.182809
30 -0.183075
31 -0.183680
32 -0.187859
33 -0.187519
34 -0.183447
35 -0.187691
36 -0.187479
37 -0.189085
38 -0.184452
39 -0.183416

ElasticNet(alpha=2e-05, copy_X=True, fit_intercept=True, l1_ratio=0.97,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1822
0: 0.1739
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  92 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   11.1s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.181829
1  -0.181624
2  -0.181042
3  -0.181422
4  -0.181039
5  -0.180030
6  -0.181424
7  -0.179922
8  -0.186328
9  -0.181112
10 -0.181042
11 -0.181047
12 -0.180561
13 -0.181123
14 -0.181398
15 -0.183223
16 -0.181048
17 -0.180814
18 -0.181418
19 -0.181053
20 -0.181099
21 -0.185089
22 -0.180788
23 -0.186328
24 -0.180307
25 -0.180140
26 -0.181065
27 -0.185592
28 -0.185966
29 -0.184707
30 -0.179458
31 -0.186328
32 -0.186328
33 -0.181087
34 -0.180667
35 -0.181038
36 -0.181128
37 -0.181001
38 -0.180994
39 -0.180517

ElasticNet(alpha=0.0002, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1795
1: 0.1801
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   18.0s finished


           0
0  -0.181363
1  -0.180611
2  -0.180811
3  -0.180677
4  -0.180612
5  -0.180841
6  -0.185877
7  -0.180919
8  -0.185877
9  -0.180871
10 -0.180259
11 -0.180467
12 -0.183321
13 -0.180811
14 -0.180806
15 -0.185877
16 -0.185877
17 -0.182831
18 -0.183773
19 -0.185231
20 -0.180559
21 -0.180618
22 -0.185877
23 -0.181366
24 -0.182589
25 -0.182611
26 -0.184352
27 -0.180629
28 -0.180749
29 -0.180737
30 -0.180765
31 -0.180352
32 -0.180853
33 -0.180514
34 -0.185877
35 -0.180599
36 -0.180881
37 -0.185656
38 -0.180611
39 -0.185877

ElasticNet(alpha=0.0001, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1803
2: 0.1813
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   17.3s finished


           0
0  -0.185674
1  -0.181985
2  -0.182082
3  -0.182738
4  -0.182378
5  -0.181460
6  -0.187213
7  -0.182279
8  -0.182501
9  -0.181741
10 -0.182395
11 -0.187213
12 -0.182086
13 -0.186907
14 -0.181445
15 -0.181515
16 -0.187213
17 -0.182228
18 -0.181326
19 -0.182639
20 -0.182652
21 -0.181523
22 -0.187213
23 -0.185972
24 -0.181607
25 -0.182383
26 -0.182993
27 -0.181245
28 -0.182336
29 -0.182794
30 -0.182077
31 -0.182566
32 -0.182430
33 -0.182388
34 -0.182480
35 -0.182417
36 -0.185782
37 -0.182446
38 -0.182406
39 -0.187210

ElasticNet(alpha=5e-05, copy_X=True, fit_intercept=True, l1_ratio=0.98,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1812
3: 0.1775
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   20.4s finished


           0
0  -0.179085
1  -0.179290
2  -0.179185
3  -0.179141
4  -0.179439
5  -0.185231
6  -0.179971
7  -0.179497
8  -0.179601
9  -0.179262
10 -0.179198
11 -0.179358
12 -0.179336
13 -0.178961
14 -0.185231
15 -0.179154
16 -0.179400
17 -0.179085
18 -0.179309
19 -0.179054
20 -0.179456
21 -0.179695
22 -0.185231
23 -0.179456
24 -0.179265
25 -0.179342
26 -0.179437
27 -0.179365
28 -0.179181
29 -0.179278
30 -0.179345
31 -0.179512
32 -0.185231
33 -0.179175
34 -0.179150
35 -0.179331
36 -0.179292
37 -0.179315
38 -0.178948
39 -0.178927

ElasticNet(alpha=5e-05, copy_X=True, fit_intercept=True, l1_ratio=0.95,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1789
4: 0.1858

Model Bag Time: 0:01:22

---Training Models for domain2_var2---

Training Fold 0 of 5:
0.1802 for SVR C=0.2, epsilon=1
0.1805 for SVR C=0.2, epsilon=7
0.1790 for SVR C=1, epsilon=3
0.1779 for SVR C=7, epsilo

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   11.8s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.176018
1  -0.174979
2  -0.175502
3  -0.177900
4  -0.177912
5  -0.176791
6  -0.175482
7  -0.178175
8  -0.175757
9  -0.178172
10 -0.177930
11 -0.176061
12 -0.174865
13 -0.174904
14 -0.175580
15 -0.175499
16 -0.175105
17 -0.175412
18 -0.175130
19 -0.175559
20 -0.175554
21 -0.175236
22 -0.176714
23 -0.175501
24 -0.176023
25 -0.175341
26 -0.175331
27 -0.177077
28 -0.177483
29 -0.174970
30 -0.175054
31 -0.175698
32 -0.177702
33 -0.177523
34 -0.175508
35 -0.177902
36 -0.177501
37 -0.178175
38 -0.176074
39 -0.175323

ElasticNet(alpha=2e-05, copy_X=True, fit_intercept=True, l1_ratio=0.97,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1749
0: 0.1756
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done  92 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 273 out of 280 | elapsed:   10.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   10.9s finished
  positive)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


           0
0  -0.173225
1  -0.173042
2  -0.172724
3  -0.172958
4  -0.172720
5  -0.171982
6  -0.172958
7  -0.171969
8  -0.175891
9  -0.172778
10 -0.172470
11 -0.172696
12 -0.172145
13 -0.172784
14 -0.172950
15 -0.174181
16 -0.172730
17 -0.172260
18 -0.172956
19 -0.172671
20 -0.172603
21 -0.175271
22 -0.172263
23 -0.175891
24 -0.172051
25 -0.171987
26 -0.172496
27 -0.175601
28 -0.175730
29 -0.175019
30 -0.171952
31 -0.175891
32 -0.175891
33 -0.172566
34 -0.172191
35 -0.172705
36 -0.172787
37 -0.172411
38 -0.172405
39 -0.172132

ElasticNet(alpha=0.0002, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.172
1: 0.1706
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   17.2s finished


           0
0  -0.170055
1  -0.169565
2  -0.169486
3  -0.169629
4  -0.169564
5  -0.169347
6  -0.172657
7  -0.169603
8  -0.172657
9  -0.169287
10 -0.168494
11 -0.168745
12 -0.171353
13 -0.169386
14 -0.169079
15 -0.172657
16 -0.172657
17 -0.170987
18 -0.171693
19 -0.172306
20 -0.168819
21 -0.169560
22 -0.172657
23 -0.170069
24 -0.170803
25 -0.170819
26 -0.171814
27 -0.169516
28 -0.169600
29 -0.169466
30 -0.169007
31 -0.168566
32 -0.169327
33 -0.168784
34 -0.172657
35 -0.168850
36 -0.169314
37 -0.172633
38 -0.169565
39 -0.172657

ElasticNet(alpha=0.0001, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1685
2: 0.1749
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   16.6s finished


           0
0  -0.178668
1  -0.175421
2  -0.175478
3  -0.176611
4  -0.176044
5  -0.175641
6  -0.179553
7  -0.175737
8  -0.176190
9  -0.175304
10 -0.175946
11 -0.179553
12 -0.175481
13 -0.179547
14 -0.175202
15 -0.175221
16 -0.179553
17 -0.175674
18 -0.175279
19 -0.176415
20 -0.176427
21 -0.175223
22 -0.179553
23 -0.178912
24 -0.175254
25 -0.176013
26 -0.176778
27 -0.175258
28 -0.175932
29 -0.176699
30 -0.175475
31 -0.176337
32 -0.176170
33 -0.175961
34 -0.176127
35 -0.176113
36 -0.178756
37 -0.175813
38 -0.175753
39 -0.179553

ElasticNet(alpha=1e-05, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1752
3: 0.1790
Fitting 7 folds for each of 40 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 273 out of 280 | elapsed:   20.5s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:   20.9s finished


           0
0  -0.175326
1  -0.176955
2  -0.175871
3  -0.177087
4  -0.176757
5  -0.179546
6  -0.177331
7  -0.176679
8  -0.175481
9  -0.176944
10 -0.175336
11 -0.176155
12 -0.176903
13 -0.175445
14 -0.179546
15 -0.175826
16 -0.176707
17 -0.175714
18 -0.176870
19 -0.175717
20 -0.176348
21 -0.177338
22 -0.179546
23 -0.176331
24 -0.176903
25 -0.176848
26 -0.176456
27 -0.176169
28 -0.175864
29 -0.176950
30 -0.176132
31 -0.176509
32 -0.179546
33 -0.175855
34 -0.175818
35 -0.176856
36 -0.176894
37 -0.176917
38 -0.175583
39 -0.175445

ElasticNet(alpha=2e-05, copy_X=True, fit_intercept=True, l1_ratio=0.95,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=None, selection='random', tol=0.0001, warm_start=False)
Best Score: -0.1753
4: 0.1701

Model Bag Time: 0:01:21



  positive)


### Prediction Code

In [26]:
def predictBag(X, y, groups, clfs, target = None):
    start_time = datetime.datetime.now(); 

    valid = ~y.isnull()
    X = X[valid]; y = y[valid]; groups = groups[valid]
    
    if target is not None:
        X = cleanX(X, target)
    
    group_list = [*dict.fromkeys(groups)]   
    group_list.sort()

    preds = []; ys=[]; datestack = []
    for idx, group in enumerate(group_list):
        g = gc.collect()
        x_holdout = X[groups == group]
        y_holdout = y[groups == group]
  
        y_pred = clfs[idx].predict(x_holdout)    
        preds.append(y_pred)
        ys.append(y_holdout)
    
        print("{}: {:.4f}".format(group,
              fnae(y_holdout, y_pred) ) )
        
    y_pred = np.concatenate(preds)
    y_true = np.concatenate(ys) 
    
    print("\Bag Prediction Time: {}\n".format(str(datetime.datetime.now() - start_time).split('.', 2)[0] ))
    return y_pred, y_true

In [27]:
def predictAll(X_test, all_base_clfs, all_clfs):
    start_time = datetime.datetime.now(); 
        
    def predict_model(model, X):
        o = model.predict(X)
        return o    
    
    all_preds = pd.DataFrame(columns = targets, index=X_test.index)
    for tidx, y_var in enumerate(targets): # loop over targets
        print(y_var)
        Xi = cleanX(X_test, y_var)
        base_clfs = all_base_clfs[tidx][0]
         

        preds = []; 
        for g_idx, g_clfs in enumerate(base_clfs): # loop over groups
            print(g_idx)
            preds.append(Parallel(n_jobs=4)(delayed(predict_model)(model, Xi) for model in g_clfs))
        print("\Base Classifier Prediction Time: {}\n".format(str(datetime.datetime.now() - start_time).split('.', 2)[0] ))


        c_preds = []; sub_preds = np.zeros((len(preds), len(Xi)))
        for c_idx in range(0, len(preds[0])):  
            if len(preds[0][c_idx].shape) > 1: 
                for t_idx in range(0, preds[0][c_idx].shape[1]):
                    for g_idx, this_pred_group in enumerate(preds):  
                        sub_preds[g_idx, :] = this_pred_group[c_idx][:, t_idx]
                    c_preds.append(np.mean( sub_preds, axis = 0))  
            else:
                for g_idx, this_pred_group in enumerate(preds): 
                    sub_preds[g_idx, :] = this_pred_group[c_idx]
                c_preds.append(np.mean( sub_preds, axis = 0)) 

        Xf = pd.concat( (Xi, pd.DataFrame( dict(zip(all_base_clfs[tidx][2], c_preds)), index=Xi.index) ),
                     axis = 'columns')
        print("\nTime Elapsed: {}\n".format(str(datetime.datetime.now() - start_time).split('.', 2)[0] ))
 

        s = scalers[tidx]
        print('\nrunning stacker')
        pred = Parallel(n_jobs=4)(delayed(predict_model)(model, s.transform(metaFilter(Xf))) 
                                                       for model in all_clfs[tidx])
        sub_preds = np.zeros((len(all_clfs[tidx]), len(Xi)))
        for g_idx, clf in enumerate(all_clfs[tidx]):
            sub_preds[g_idx, :] = pred[g_idx]
        all_preds[y_var] = np.mean(sub_preds, axis = 0)


    end_time = datetime.datetime.now(); 
    print("\Prediction Time: {}\n".format(str(end_time - start_time).split('.', 2)[0] ))
    return all_preds, Xf

### Show Scores by Fold

In [28]:
y_preds = pd.DataFrame(index = X.index)
y_trues = y_preds.copy()
scores = pd.DataFrame(index = targets, columns = ['score'])
for idx, y_var in enumerate(targets):
    print(y_var)
    s = scalers[idx]
    y_pred, y_true =  predictBag(s.transform(metaFilter(all_base_clfs[idx][3])), 
                                 all_base_clfs[idx][4][y_var], all_base_clfs[idx][5], all_clfs[idx] ) 
    score = fnae(y_true, y_pred)
    print('{}: {:.4f}\n\n'.format(y_var, score))
    scores.loc[y_var] = score

scores.round(4) # MSCORE

age
0: 0.1474
1: 0.1416
2: 0.1423
3: 0.1385
4: 0.1402
\Bag Prediction Time: 0:00:04

age: 0.1420


domain1_var1
0: 0.1466
1: 0.1550
2: 0.1502
3: 0.1493
4: 0.1468
\Bag Prediction Time: 0:00:00

domain1_var1: 0.1496


domain1_var2
0: 0.1480
1: 0.1468
2: 0.1500
3: 0.1516
4: 0.1491
\Bag Prediction Time: 0:00:00

domain1_var2: 0.1491


domain2_var1
0: 0.1739
1: 0.1801
2: 0.1813
3: 0.1775
4: 0.1858
\Bag Prediction Time: 0:00:00

domain2_var1: 0.1796


domain2_var2
0: 0.1756
1: 0.1706
2: 0.1749
3: 0.1790
4: 0.1701
\Bag Prediction Time: 0:00:00

domain2_var2: 0.1741




Unnamed: 0,score
age,0.141986
domain1_var1,0.149588
domain1_var2,0.149135
domain2_var1,0.179633
domain2_var2,0.1741


### Show Overall Score

In [29]:
try:
    weights = pd.DataFrame( index = ALL_TARGETS, data = [.3, .175, .175, .175, .175] )
    overall_score = np.sum(scores * weights.values).iloc[0]
    age_score = np.mean(scores.iloc[:1]).iloc[0]
    other_scores = np.mean(scores.iloc[1:]).iloc[0]

    print(np.round(scores,4))
    print("\nOverall Score: {:.4f}".format(overall_score))

    print("   {:.4f}:  {:.4f} / {:.4f}   {}".format(overall_score, age_score, other_scores, 
                          [ np.round(s, 4) for s in scores.score] ))

except:
    pass

                 score
age           0.141986
domain1_var1  0.149588
domain1_var2  0.149135
domain2_var1  0.179633
domain2_var2    0.1741

Overall Score: 0.1568
   0.1568:  0.1420 / 0.1631   [0.142, 0.1496, 0.1491, 0.1796, 0.1741]


### Build Submission

In [30]:
y_oos, Xf = predictAll(test_data, all_base_clfs, all_clfs) 

y_oos = y_oos.reset_index().melt(id_vars = 'Id', value_name = 'Predicted')
y_oos.Id = y_oos.Id.astype(str) + '_' + y_oos.variable
y_oos.drop(columns = 'variable', inplace=True)

y_oos.to_csv('submission.csv', index=False)

age
0
1
2
3
4
\Base Classifier Prediction Time: 0:13:30


Time Elapsed: 0:13:30


running stacker
domain1_var1
0
1
2
3
4
\Base Classifier Prediction Time: 0:26:27


Time Elapsed: 0:26:27


running stacker
domain1_var2
0
1
2
3
4
\Base Classifier Prediction Time: 0:39:42


Time Elapsed: 0:39:42


running stacker
domain2_var1
0
1
2
3
4
\Base Classifier Prediction Time: 0:53:52


Time Elapsed: 0:53:52


running stacker
domain2_var2
0
1
2
3
4
\Base Classifier Prediction Time: 1:08:14


Time Elapsed: 1:08:14


running stacker
\Prediction Time: 1:08:14



### Show Final Submission

In [31]:
y_oos

Unnamed: 0,Id,Predicted
0,10003_age,57.066938
1,10006_age,64.099035
2,10010_age,36.602631
3,10011_age,50.129355
4,10012_age,52.084979
...,...,...
29380,21745_domain2_var2,52.554427
29381,21748_domain2_var2,50.862736
29382,21749_domain2_var2,52.194498
29383,21751_domain2_var2,56.396514
