In [1]:
#imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from hyperopt import hp
from hyperopt.pyll import scope
from catboost import CatBoostRegressor
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, FeatureUnion

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from models.hyperopt_model import HyperoptModel
from models.utils import CustomDataFrameMapper
from models.utils import get_general_features_quora, _svr_features_transformations
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr,  spearmanr,  kendalltau

  from pandas.core import datetools


In [4]:
# read the data
preprocessed_data = pd.read_pickle('./data/quora.big_quantitative.pd')
# Create a dataframe
df = pd.DataFrame(preprocessed_data)
print(df.columns)

Index(['user_id', 'user_answers_num', 'user_questions_num', 'user_blogs_num',
       'user_posts_num', 'user_edits_num', 'user_followers_num',
       'user_followings_num', 'user_has_picture', 'user_topics_num',
       'user_bio', 'answer_id', 'answer_is_downvoted', 'answer_comments_num',
       'answer_content', 'answer_user_credential', 'score', 'answer_views_num',
       'answer_timestamp', 'answer_rank', 'question_id', 'question_title',
       'question_fetched_answers_num', 'question_expected_answers_num',
       'question_followers_num', 'question_comments_num',
       'user_fetched_answers_num', 'days_since_epoch', 'days_rescaled',
       'user_z_score', 'user_ff_ratio', 'max_answer_rank', 'answer_rank_ratio',
       'user_top_score_ratio', 'question_subjectivity', 'answer_smog_index',
       'answer_smog_min_age', 'user_obj_presense', 'user_subj_presense',
       'Unnamed: 0', 'lda_1', 'lda_2', 'lda_3', 'lda_4', 'lda_5', 'lda_6',
       'lda_7', 'lda_8', 'lda_9', 'lda_10', 'lda

In [3]:
# Create two new dataframes, one with the training rows, one with the test rows
train, test=train_test_split(df, test_size=0.2, random_state=0)

In [4]:
# Show the number of observations for the test and training dataframes
print('Number of observations in the training data:', len(train))
print('Number of observations in the test data:',len(test))

Number of observations in the training data: 114411
Number of observations in the test data: 28603


In [5]:
#get features
answer_features_transformations, \
question_features_transformations, \
user_features_transformations = get_general_features_quora()

In [6]:
    #some parameters
    DO_LOWESS = False
    hyperopt = False
    # Run CatBoostRegressor with hyperopt optimization
    cat = HyperoptModel(train.copy(), test.copy(), 'cat', cv=3)
    cat.raw_features = []
    cat.pipeline = Pipeline([
        ('prepare_features', FeatureUnion([
            ('user_features', CustomDataFrameMapper(user_features_transformations)),
            ('time_features', CustomDataFrameMapper(time_features_transformations)),
            ('answer_features', CustomDataFrameMapper(answer_features_transformations)),
            ('question_features', CustomDataFrameMapper(question_features_transformations))
        ])),
        ('estimate', CatBoostRegressor(**{'bagging_temperature': 0.29793733267072053,
                                          'iterations': 5000,
                                          'l2_leaf_reg': 1.5511980979084095,
                                          'learning_rate': 0.08683045812519587,
                                          'loss_function': 'RMSE',
                                          'random_seed': 0,
                                          'random_strength': 20,
                                          'verbose': True,
                                          'train_dir': 'outputs/cat'}))
    ])
    
    for transformer in cat.pipeline.named_steps['prepare_features'].transformer_list:
        cat.raw_features += [t[0] if isinstance(t[0], str) else t[0][0] for t in transformer[1].features]
    
    cat.space = {
        'estimate__iterations': hp.choice('estimate__iterations', [50]),
        'estimate__loss_function': hp.choice('estimate__loss_function', ['RMSE']),
        'estimate__train_dir': hp.choice('estimate__train_dir', ['outputs/cat']),
        'estimate__thread_count': hp.choice('estimate__thread_count', [4]),
        'estimate__used_ram_limit': hp.choice('estimate__used_ram_limit', [1024 * 1024 * 1024 * 4]),  # 4gb
        'estimate__random_seed': hp.choice('estimate__random_seed', [0]),

        'estimate__learning_rate': hp.loguniform('estimate__learning_rate', -5, 0),
        'estimate__random_strength': hp.choice('estimate__random_strength', [1, 20]),
        'estimate__l2_leaf_reg': hp.loguniform('estimate__l2_leaf_reg', 0, np.log(10)),
        'estimate__bagging_temperature': hp.uniform('estimate__bagging_temperature', 0, 1),
    }

    """
    find the best number of trees (following https://tech.yandex.com/catboost/doc/dg/concepts/parameter-tuning_trees-number-docpage/#parameter-tuning_trees-number)
    need to also substitute the CatBoostRegressor parameters with:
        ('estimate', CatBoostRegressor(iterations=10000, loss_function='RMSE', auto_stop_pval=1e-4, use_best_model=True, train_dir='outputs/cat_trees', verbose=True))
    """
    # num_trees_train, num_trees_eval = train_test_split(train, test_size=0.2, random_state=0)
    # X = num_trees_train[list(filter(lambda column: column in cat.raw_features, cat.train.columns))]
    # y = num_trees_train['score']
    #
    # eval_X = num_trees_eval[list(filter(lambda column: column in cat.raw_features, cat.train.columns))]
    # eval_X = cat.pipeline.named_steps['prepare_features'].fit_transform(eval_X)
    # eval_set = (eval_X, num_trees_eval['score'])
    #
    # cat.pipeline.fit(X=X, y=y, estimate__eval_set=eval_set)
    # cat.model = cat.pipeline
    # print(cat.model.named_steps['estimate'].get_params())
    # cat.model.named_steps['estimate'].save_model('tmp/cat.model'.encode('utf-8'))

    if hyperopt:
        cat.run(do_lowess=DO_LOWESS)
    else:
        # train with default params
        cat.pipeline.fit(X=cat.X_train, y=cat.y_train)
        cat.model = cat.pipeline
        cat.stats()

        # joblib.dump(cat, './outputs/models/%s.pckl' % cat.output_prefix)
        # cat = joblib.load('./outputs/models/%s.pckl' % cat.output_prefix)

        cat.plot_predicted_vs_actual(do_lowess=False)
        cat.plot_residuals(r_type='raw', do_lowess=False)
        cat.plot_feature_importance()

Borders generated
0:	learn 0.5034394424passed: 0.229 sec	total: 1.11s	remaining: 1h 32m 20s
1:	learn 0.4617420782passed: 0.177 sec	total: 1.29s	remaining: 53m 33s
2:	learn 0.4238395137passed: 0.176 sec	total: 1.46s	remaining: 40m 35s
3:	learn 0.389409431passed: 0.189 sec	total: 1.65s	remaining: 34m 22s
4:	learn 0.3581937181passed: 0.176 sec	total: 1.83s	remaining: 30m 26s
5:	learn 0.3298876144passed: 0.176 sec	total: 2s	remaining: 27m 47s
6:	learn 0.3042402245passed: 0.179 sec	total: 2.18s	remaining: 25m 57s
7:	learn 0.2809361687passed: 0.178 sec	total: 2.36s	remaining: 24m 33s
8:	learn 0.2600170142passed: 0.183 sec	total: 2.54s	remaining: 23m 31s
9:	learn 0.2412321805passed: 0.18 sec	total: 2.72s	remaining: 22m 39s
10:	learn 0.224281573passed: 0.175 sec	total: 2.9s	remaining: 21m 55s
11:	learn 0.2090629892passed: 0.183 sec	total: 3.08s	remaining: 21m 21s
12:	learn 0.1950296232passed: 0.181 sec	total: 3.26s	remaining: 20m 52s
13:	learn 0.1823680924passed: 0.18 sec	total: 3.44s	remainin

115:	learn 0.0890890894passed: 0.183 sec	total: 22.6s	remaining: 15m 52s
116:	learn 0.08905564535passed: 0.179 sec	total: 22.8s	remaining: 15m 52s
117:	learn 0.08904444157passed: 0.182 sec	total: 23s	remaining: 15m 51s
118:	learn 0.08902965717passed: 0.195 sec	total: 23.2s	remaining: 15m 51s
119:	learn 0.08901177389passed: 0.208 sec	total: 23.4s	remaining: 15m 51s
120:	learn 0.08900196475passed: 0.193 sec	total: 23.6s	remaining: 15m 51s
121:	learn 0.08896856141passed: 0.207 sec	total: 23.8s	remaining: 15m 51s
122:	learn 0.08893914663passed: 0.199 sec	total: 24s	remaining: 15m 51s
123:	learn 0.08890025147passed: 0.187 sec	total: 24.2s	remaining: 15m 50s
124:	learn 0.0888822724passed: 0.188 sec	total: 24.4s	remaining: 15m 50s
125:	learn 0.08885806098passed: 0.225 sec	total: 24.6s	remaining: 15m 51s
126:	learn 0.08882952567passed: 0.187 sec	total: 24.8s	remaining: 15m 51s
127:	learn 0.08882010561passed: 0.208 sec	total: 25s	remaining: 15m 51s
128:	learn 0.08881165227passed: 0.198 sec	tota

227:	learn 0.08524464355passed: 0.219 sec	total: 45.1s	remaining: 15m 43s
228:	learn 0.08520889135passed: 0.218 sec	total: 45.3s	remaining: 15m 43s
229:	learn 0.08518643409passed: 0.22 sec	total: 45.5s	remaining: 15m 43s
230:	learn 0.08515667053passed: 0.207 sec	total: 45.7s	remaining: 15m 43s
231:	learn 0.08513701367passed: 0.193 sec	total: 45.9s	remaining: 15m 43s
232:	learn 0.08511426909passed: 0.194 sec	total: 46.1s	remaining: 15m 43s
233:	learn 0.08509685516passed: 0.186 sec	total: 46.3s	remaining: 15m 42s
234:	learn 0.08508141921passed: 0.198 sec	total: 46.5s	remaining: 15m 42s
235:	learn 0.08505535304passed: 0.213 sec	total: 46.7s	remaining: 15m 42s
236:	learn 0.08503564414passed: 0.191 sec	total: 46.9s	remaining: 15m 42s
237:	learn 0.08501321559passed: 0.193 sec	total: 47.1s	remaining: 15m 42s
238:	learn 0.08497339749passed: 0.202 sec	total: 47.3s	remaining: 15m 42s
239:	learn 0.08494656436passed: 0.186 sec	total: 47.5s	remaining: 15m 41s
240:	learn 0.08492068439passed: 0.194 s

339:	learn 0.08311609955passed: 0.253 sec	total: 1m 11s	remaining: 16m 20s
340:	learn 0.08310510473passed: 0.257 sec	total: 1m 11s	remaining: 16m 20s
341:	learn 0.08309759113passed: 0.244 sec	total: 1m 12s	remaining: 16m 20s
342:	learn 0.08309136551passed: 0.206 sec	total: 1m 12s	remaining: 16m 20s
343:	learn 0.08308146489passed: 0.226 sec	total: 1m 12s	remaining: 16m 20s
344:	learn 0.08307086643passed: 0.193 sec	total: 1m 12s	remaining: 16m 20s
345:	learn 0.08305977966passed: 0.223 sec	total: 1m 12s	remaining: 16m 20s
346:	learn 0.08304813669passed: 0.219 sec	total: 1m 13s	remaining: 16m 19s
347:	learn 0.08303669633passed: 0.193 sec	total: 1m 13s	remaining: 16m 19s
348:	learn 0.08301812136passed: 0.197 sec	total: 1m 13s	remaining: 16m 19s
349:	learn 0.08300261145passed: 0.196 sec	total: 1m 13s	remaining: 16m 18s
350:	learn 0.08298756863passed: 0.28 sec	total: 1m 13s	remaining: 16m 19s
351:	learn 0.08297637453passed: 0.449 sec	total: 1m 14s	remaining: 16m 22s
352:	learn 0.0829619943pas

449:	learn 0.08176510296passed: 0.192 sec	total: 1m 37s	remaining: 16m 29s
450:	learn 0.0817461747passed: 0.19 sec	total: 1m 38s	remaining: 16m 29s
451:	learn 0.08173691636passed: 0.197 sec	total: 1m 38s	remaining: 16m 28s
452:	learn 0.08172845575passed: 0.188 sec	total: 1m 38s	remaining: 16m 28s
453:	learn 0.08171652989passed: 0.225 sec	total: 1m 38s	remaining: 16m 27s
454:	learn 0.0817003569passed: 0.22 sec	total: 1m 38s	remaining: 16m 27s
455:	learn 0.08168693952passed: 0.197 sec	total: 1m 39s	remaining: 16m 27s
456:	learn 0.08167887652passed: 0.187 sec	total: 1m 39s	remaining: 16m 26s
457:	learn 0.08166771958passed: 0.185 sec	total: 1m 39s	remaining: 16m 26s
458:	learn 0.08165199422passed: 0.215 sec	total: 1m 39s	remaining: 16m 26s
459:	learn 0.08163935285passed: 0.231 sec	total: 1m 39s	remaining: 16m 26s
460:	learn 0.08162624411passed: 0.186 sec	total: 1m 40s	remaining: 16m 25s
461:	learn 0.08161786841passed: 0.235 sec	total: 1m 40s	remaining: 16m 25s
462:	learn 0.08160949429passe

560:	learn 0.0806938066passed: 0.192 sec	total: 2m 2s	remaining: 16m 7s
561:	learn 0.0806820369passed: 0.181 sec	total: 2m 2s	remaining: 16m 7s
562:	learn 0.08067272361passed: 0.212 sec	total: 2m 2s	remaining: 16m 6s
563:	learn 0.08066251745passed: 0.288 sec	total: 2m 2s	remaining: 16m 7s
564:	learn 0.0806549915passed: 0.26 sec	total: 2m 3s	remaining: 16m 7s
565:	learn 0.08064509326passed: 0.447 sec	total: 2m 3s	remaining: 16m 8s
566:	learn 0.0806381152passed: 0.19 sec	total: 2m 3s	remaining: 16m 8s
567:	learn 0.08063282616passed: 0.189 sec	total: 2m 4s	remaining: 16m 8s
568:	learn 0.08062198709passed: 0.218 sec	total: 2m 4s	remaining: 16m 7s
569:	learn 0.08061424358passed: 0.202 sec	total: 2m 4s	remaining: 16m 7s
570:	learn 0.08060990578passed: 0.234 sec	total: 2m 4s	remaining: 16m 7s
571:	learn 0.08059090733passed: 0.241 sec	total: 2m 4s	remaining: 16m 7s
572:	learn 0.08058141627passed: 0.299 sec	total: 2m 5s	remaining: 16m 7s
573:	learn 0.08057226877passed: 0.281 sec	total: 2m 5s	re

671:	learn 0.07975520246passed: 0.21 sec	total: 2m 30s	remaining: 16m 10s
672:	learn 0.0797495953passed: 0.199 sec	total: 2m 30s	remaining: 16m 9s
673:	learn 0.07974130573passed: 0.196 sec	total: 2m 31s	remaining: 16m 9s
674:	learn 0.07972957487passed: 0.181 sec	total: 2m 31s	remaining: 16m 8s
675:	learn 0.07972312733passed: 0.19 sec	total: 2m 31s	remaining: 16m 8s
676:	learn 0.07970684753passed: 0.209 sec	total: 2m 31s	remaining: 16m 8s
677:	learn 0.07969884642passed: 0.214 sec	total: 2m 31s	remaining: 16m 7s
678:	learn 0.07969402795passed: 0.212 sec	total: 2m 32s	remaining: 16m 7s
679:	learn 0.0796900569passed: 0.181 sec	total: 2m 32s	remaining: 16m 6s
680:	learn 0.07968301592passed: 0.19 sec	total: 2m 32s	remaining: 16m 6s
681:	learn 0.07967781012passed: 0.186 sec	total: 2m 32s	remaining: 16m 6s
682:	learn 0.07966603443passed: 0.2 sec	total: 2m 32s	remaining: 16m 5s
683:	learn 0.07965459272passed: 0.214 sec	total: 2m 33s	remaining: 16m 5s
684:	learn 0.07964633399passed: 0.213 sec	to

782:	learn 0.07897418481passed: 0.242 sec	total: 2m 55s	remaining: 15m 42s
783:	learn 0.0789666573passed: 0.238 sec	total: 2m 55s	remaining: 15m 42s
784:	learn 0.0789621687passed: 0.248 sec	total: 2m 55s	remaining: 15m 42s
785:	learn 0.07895354881passed: 0.248 sec	total: 2m 55s	remaining: 15m 42s
786:	learn 0.07894151843passed: 0.285 sec	total: 2m 56s	remaining: 15m 42s
787:	learn 0.07893586366passed: 0.342 sec	total: 2m 56s	remaining: 15m 42s
788:	learn 0.0789269482passed: 0.213 sec	total: 2m 56s	remaining: 15m 42s
789:	learn 0.07891868361passed: 0.279 sec	total: 2m 56s	remaining: 15m 42s
790:	learn 0.07891328558passed: 0.185 sec	total: 2m 57s	remaining: 15m 42s
791:	learn 0.07890694763passed: 0.201 sec	total: 2m 57s	remaining: 15m 41s
792:	learn 0.07889767398passed: 0.222 sec	total: 2m 57s	remaining: 15m 41s
793:	learn 0.07889063994passed: 0.214 sec	total: 2m 57s	remaining: 15m 41s
794:	learn 0.07888094903passed: 0.214 sec	total: 2m 57s	remaining: 15m 41s
795:	learn 0.07887487912pass

893:	learn 0.0782075177passed: 0.188 sec	total: 3m 18s	remaining: 15m 13s
894:	learn 0.07820405987passed: 0.203 sec	total: 3m 19s	remaining: 15m 12s
895:	learn 0.07819841038passed: 0.24 sec	total: 3m 19s	remaining: 15m 12s
896:	learn 0.07819260851passed: 0.208 sec	total: 3m 19s	remaining: 15m 12s
897:	learn 0.07818720958passed: 0.187 sec	total: 3m 19s	remaining: 15m 12s
898:	learn 0.07818078544passed: 0.206 sec	total: 3m 19s	remaining: 15m 11s
899:	learn 0.07817397257passed: 0.209 sec	total: 3m 20s	remaining: 15m 11s
900:	learn 0.07816570655passed: 0.199 sec	total: 3m 20s	remaining: 15m 11s
901:	learn 0.07815584674passed: 0.189 sec	total: 3m 20s	remaining: 15m 10s
902:	learn 0.07814769479passed: 0.215 sec	total: 3m 20s	remaining: 15m 10s
903:	learn 0.07813220713passed: 0.201 sec	total: 3m 20s	remaining: 15m 10s
904:	learn 0.07812550128passed: 0.195 sec	total: 3m 21s	remaining: 15m 9s
905:	learn 0.07811954225passed: 0.213 sec	total: 3m 21s	remaining: 15m 9s
906:	learn 0.07811093456passe

1005:	learn 0.07755159082passed: 0.179 sec	total: 3m 44s	remaining: 14m 50s
1006:	learn 0.07754671214passed: 0.197 sec	total: 3m 44s	remaining: 14m 50s
1007:	learn 0.0775355906passed: 0.204 sec	total: 3m 44s	remaining: 14m 49s
1008:	learn 0.07752983762passed: 0.217 sec	total: 3m 44s	remaining: 14m 49s
1009:	learn 0.0775247661passed: 0.186 sec	total: 3m 45s	remaining: 14m 49s
1010:	learn 0.07752018048passed: 0.211 sec	total: 3m 45s	remaining: 14m 49s
1011:	learn 0.07751469802passed: 0.206 sec	total: 3m 45s	remaining: 14m 48s
1012:	learn 0.07750745424passed: 0.218 sec	total: 3m 45s	remaining: 14m 48s
1013:	learn 0.07749937322passed: 0.216 sec	total: 3m 45s	remaining: 14m 48s
1014:	learn 0.07749370628passed: 0.182 sec	total: 3m 46s	remaining: 14m 47s
1015:	learn 0.07748748695passed: 0.194 sec	total: 3m 46s	remaining: 14m 47s
1016:	learn 0.07748453024passed: 0.199 sec	total: 3m 46s	remaining: 14m 47s
1017:	learn 0.07747838395passed: 0.202 sec	total: 3m 46s	remaining: 14m 46s
1018:	learn 0.

1115:	learn 0.07694021058passed: 0.19 sec	total: 4m 6s	remaining: 14m 18s
1116:	learn 0.07693437476passed: 0.234 sec	total: 4m 6s	remaining: 14m 18s
1117:	learn 0.07692872582passed: 0.213 sec	total: 4m 7s	remaining: 14m 18s
1118:	learn 0.07692274898passed: 0.204 sec	total: 4m 7s	remaining: 14m 17s
1119:	learn 0.07691638942passed: 0.207 sec	total: 4m 7s	remaining: 14m 17s
1120:	learn 0.07691163795passed: 0.206 sec	total: 4m 7s	remaining: 14m 17s
1121:	learn 0.07690242291passed: 0.208 sec	total: 4m 7s	remaining: 14m 17s
1122:	learn 0.07689818111passed: 0.246 sec	total: 4m 8s	remaining: 14m 17s
1123:	learn 0.07689365248passed: 0.233 sec	total: 4m 8s	remaining: 14m 16s
1124:	learn 0.07688608394passed: 0.244 sec	total: 4m 8s	remaining: 14m 16s
1125:	learn 0.07687871921passed: 0.278 sec	total: 4m 9s	remaining: 14m 16s
1126:	learn 0.0768744663passed: 0.233 sec	total: 4m 9s	remaining: 14m 16s
1127:	learn 0.07687037031passed: 0.221 sec	total: 4m 9s	remaining: 14m 16s
1128:	learn 0.07686561032pa

1224:	learn 0.0763469188passed: 0.216 sec	total: 4m 33s	remaining: 14m 3s
1225:	learn 0.07634212262passed: 0.19 sec	total: 4m 34s	remaining: 14m 3s
1226:	learn 0.07633837602passed: 0.205 sec	total: 4m 34s	remaining: 14m 3s
1227:	learn 0.07633262505passed: 0.186 sec	total: 4m 34s	remaining: 14m 2s
1228:	learn 0.07632793056passed: 0.204 sec	total: 4m 34s	remaining: 14m 2s
1229:	learn 0.07632338639passed: 0.198 sec	total: 4m 34s	remaining: 14m 2s
1230:	learn 0.07631624181passed: 0.231 sec	total: 4m 35s	remaining: 14m 2s
1231:	learn 0.07631108098passed: 0.197 sec	total: 4m 35s	remaining: 14m 1s
1232:	learn 0.07630719328passed: 0.218 sec	total: 4m 35s	remaining: 14m 1s
1233:	learn 0.0763015491passed: 0.209 sec	total: 4m 35s	remaining: 14m 1s
1234:	learn 0.07629868577passed: 0.263 sec	total: 4m 35s	remaining: 14m 1s
1235:	learn 0.07629215344passed: 0.3 sec	total: 4m 36s	remaining: 14m 1s
1236:	learn 0.07628714801passed: 0.243 sec	total: 4m 36s	remaining: 14m 1s
1237:	learn 0.07628004193passe

1334:	learn 0.07576364599passed: 0.194 sec	total: 4m 58s	remaining: 13m 39s
1335:	learn 0.07576034607passed: 0.184 sec	total: 4m 58s	remaining: 13m 38s
1336:	learn 0.07575577082passed: 0.192 sec	total: 4m 58s	remaining: 13m 38s
1337:	learn 0.07575149077passed: 0.221 sec	total: 4m 58s	remaining: 13m 38s
1338:	learn 0.07574654315passed: 0.185 sec	total: 4m 59s	remaining: 13m 37s
1339:	learn 0.07574305921passed: 0.185 sec	total: 4m 59s	remaining: 13m 37s
1340:	learn 0.07573925397passed: 0.188 sec	total: 4m 59s	remaining: 13m 37s
1341:	learn 0.07573366607passed: 0.196 sec	total: 4m 59s	remaining: 13m 37s
1342:	learn 0.07572864535passed: 0.2 sec	total: 4m 59s	remaining: 13m 36s
1343:	learn 0.07572423968passed: 0.211 sec	total: 5m	remaining: 13m 36s
1344:	learn 0.07571754861passed: 0.2 sec	total: 5m	remaining: 13m 36s
1345:	learn 0.075712827passed: 0.204 sec	total: 5m	remaining: 13m 35s
1346:	learn 0.07570901339passed: 0.208 sec	total: 5m	remaining: 13m 35s
1347:	learn 0.07570481248passed: 0

1443:	learn 0.07522986583passed: 0.232 sec	total: 5m 20s	remaining: 13m 9s
1444:	learn 0.07522530339passed: 0.204 sec	total: 5m 20s	remaining: 13m 9s
1445:	learn 0.07522124087passed: 0.226 sec	total: 5m 21s	remaining: 13m 9s
1446:	learn 0.07521485714passed: 0.22 sec	total: 5m 21s	remaining: 13m 8s
1447:	learn 0.0752108752passed: 0.189 sec	total: 5m 21s	remaining: 13m 8s
1448:	learn 0.07520788036passed: 0.191 sec	total: 5m 21s	remaining: 13m 8s
1449:	learn 0.07520033945passed: 0.21 sec	total: 5m 21s	remaining: 13m 8s
1450:	learn 0.07519641555passed: 0.21 sec	total: 5m 22s	remaining: 13m 7s
1451:	learn 0.0751920353passed: 0.201 sec	total: 5m 22s	remaining: 13m 7s
1452:	learn 0.07518820352passed: 0.235 sec	total: 5m 22s	remaining: 13m 7s
1453:	learn 0.07518384109passed: 0.204 sec	total: 5m 22s	remaining: 13m 7s
1454:	learn 0.07517908848passed: 0.214 sec	total: 5m 22s	remaining: 13m 6s
1455:	learn 0.07517023423passed: 0.206 sec	total: 5m 23s	remaining: 13m 6s
1456:	learn 0.07516715258passe

1552:	learn 0.07473917974passed: 0.223 sec	total: 5m 43s	remaining: 12m 41s
1553:	learn 0.07473696275passed: 0.189 sec	total: 5m 43s	remaining: 12m 41s
1554:	learn 0.07473128742passed: 0.195 sec	total: 5m 43s	remaining: 12m 41s
1555:	learn 0.0747262483passed: 0.202 sec	total: 5m 43s	remaining: 12m 40s
1556:	learn 0.07472287454passed: 0.193 sec	total: 5m 43s	remaining: 12m 40s
1557:	learn 0.07471791591passed: 0.215 sec	total: 5m 44s	remaining: 12m 40s
1558:	learn 0.07471354412passed: 0.208 sec	total: 5m 44s	remaining: 12m 40s
1559:	learn 0.0747107832passed: 0.207 sec	total: 5m 44s	remaining: 12m 39s
1560:	learn 0.07470851389passed: 0.207 sec	total: 5m 44s	remaining: 12m 39s
1561:	learn 0.07470437022passed: 0.228 sec	total: 5m 44s	remaining: 12m 39s
1562:	learn 0.07469987243passed: 0.198 sec	total: 5m 45s	remaining: 12m 39s
1563:	learn 0.07469486326passed: 0.222 sec	total: 5m 45s	remaining: 12m 38s
1564:	learn 0.07469073265passed: 0.22 sec	total: 5m 45s	remaining: 12m 38s
1565:	learn 0.0

1661:	learn 0.07427247165passed: 0.212 sec	total: 6m 5s	remaining: 12m 14s
1662:	learn 0.07426641734passed: 0.24 sec	total: 6m 6s	remaining: 12m 14s
1663:	learn 0.07426228383passed: 0.226 sec	total: 6m 6s	remaining: 12m 14s
1664:	learn 0.07425877705passed: 0.206 sec	total: 6m 6s	remaining: 12m 14s
1665:	learn 0.07425501631passed: 0.19 sec	total: 6m 6s	remaining: 12m 13s
1666:	learn 0.07425050067passed: 0.198 sec	total: 6m 6s	remaining: 12m 13s
1667:	learn 0.07424654185passed: 0.221 sec	total: 6m 7s	remaining: 12m 13s
1668:	learn 0.07424277163passed: 0.221 sec	total: 6m 7s	remaining: 12m 13s
1669:	learn 0.07423842873passed: 0.196 sec	total: 6m 7s	remaining: 12m 12s
1670:	learn 0.07423435382passed: 0.198 sec	total: 6m 7s	remaining: 12m 12s
1671:	learn 0.07423055626passed: 0.196 sec	total: 6m 7s	remaining: 12m 12s
1672:	learn 0.07422521407passed: 0.195 sec	total: 6m 8s	remaining: 12m 12s
1673:	learn 0.07422166245passed: 0.216 sec	total: 6m 8s	remaining: 12m 11s
1674:	learn 0.07421212419pa

1771:	learn 0.07382568548passed: 0.261 sec	total: 6m 28s	remaining: 11m 48s
1772:	learn 0.07382200511passed: 0.224 sec	total: 6m 29s	remaining: 11m 48s
1773:	learn 0.07381812827passed: 0.19 sec	total: 6m 29s	remaining: 11m 47s
1774:	learn 0.07381596802passed: 0.215 sec	total: 6m 29s	remaining: 11m 47s
1775:	learn 0.07381369621passed: 0.231 sec	total: 6m 29s	remaining: 11m 47s
1776:	learn 0.07381105935passed: 0.213 sec	total: 6m 29s	remaining: 11m 47s
1777:	learn 0.07380750456passed: 0.209 sec	total: 6m 30s	remaining: 11m 46s
1778:	learn 0.0738052104passed: 0.243 sec	total: 6m 30s	remaining: 11m 46s
1779:	learn 0.07380176053passed: 0.232 sec	total: 6m 30s	remaining: 11m 46s
1780:	learn 0.07379857266passed: 0.214 sec	total: 6m 30s	remaining: 11m 46s
1781:	learn 0.07379543049passed: 0.201 sec	total: 6m 30s	remaining: 11m 46s
1782:	learn 0.07379078739passed: 0.188 sec	total: 6m 31s	remaining: 11m 45s
1783:	learn 0.07378371697passed: 0.218 sec	total: 6m 31s	remaining: 11m 45s
1784:	learn 0.

1988:	learn 0.07292843858passed: 0.22 sec	total: 7m 26s	remaining: 11m 15s
1989:	learn 0.07292421087passed: 0.255 sec	total: 7m 26s	remaining: 11m 15s
1990:	learn 0.07291834428passed: 0.245 sec	total: 7m 26s	remaining: 11m 15s
1991:	learn 0.0729142609passed: 0.292 sec	total: 7m 26s	remaining: 11m 14s
1992:	learn 0.07291256874passed: 0.282 sec	total: 7m 27s	remaining: 11m 14s
1993:	learn 0.07290823238passed: 0.338 sec	total: 7m 27s	remaining: 11m 14s
1994:	learn 0.07290507103passed: 0.312 sec	total: 7m 27s	remaining: 11m 14s
1995:	learn 0.07290140426passed: 0.45 sec	total: 7m 28s	remaining: 11m 14s
1996:	learn 0.07289780555passed: 0.453 sec	total: 7m 28s	remaining: 11m 14s
1997:	learn 0.07289219972passed: 0.433 sec	total: 7m 29s	remaining: 11m 15s
1998:	learn 0.07288696874passed: 0.292 sec	total: 7m 29s	remaining: 11m 14s
1999:	learn 0.07288380633passed: 0.41 sec	total: 7m 29s	remaining: 11m 14s
2000:	learn 0.07288134701passed: 0.405 sec	total: 7m 30s	remaining: 11m 14s
2001:	learn 0.07

2097:	learn 0.07249851152passed: 0.398 sec	total: 8m 1s	remaining: 11m 5s
2098:	learn 0.0724950293passed: 0.302 sec	total: 8m 1s	remaining: 11m 5s
2099:	learn 0.07249059831passed: 0.296 sec	total: 8m 1s	remaining: 11m 5s
2100:	learn 0.07248510221passed: 0.306 sec	total: 8m 2s	remaining: 11m 5s
2101:	learn 0.07247968722passed: 0.317 sec	total: 8m 2s	remaining: 11m 5s
2102:	learn 0.07247596253passed: 0.301 sec	total: 8m 2s	remaining: 11m 5s
2103:	learn 0.07247245964passed: 0.319 sec	total: 8m 3s	remaining: 11m 4s
2104:	learn 0.07247008498passed: 0.271 sec	total: 8m 3s	remaining: 11m 4s
2105:	learn 0.07246720889passed: 0.335 sec	total: 8m 3s	remaining: 11m 4s
2106:	learn 0.072463202passed: 0.307 sec	total: 8m 4s	remaining: 11m 4s
2107:	learn 0.0724608004passed: 0.312 sec	total: 8m 4s	remaining: 11m 4s
2108:	learn 0.07245659857passed: 0.32 sec	total: 8m 4s	remaining: 11m 4s
2109:	learn 0.07245202332passed: 0.321 sec	total: 8m 4s	remaining: 11m 4s
2110:	learn 0.07244640096passed: 0.357 sec	

2207:	learn 0.07205871517passed: 0.336 sec	total: 8m 36s	remaining: 10m 52s
2208:	learn 0.07205578199passed: 0.312 sec	total: 8m 36s	remaining: 10m 52s
2209:	learn 0.07205131586passed: 0.293 sec	total: 8m 36s	remaining: 10m 52s
2210:	learn 0.07204636975passed: 0.343 sec	total: 8m 37s	remaining: 10m 52s
2211:	learn 0.07204071833passed: 0.325 sec	total: 8m 37s	remaining: 10m 52s
2212:	learn 0.07203610285passed: 0.292 sec	total: 8m 37s	remaining: 10m 51s
2213:	learn 0.07203280332passed: 0.312 sec	total: 8m 38s	remaining: 10m 51s
2214:	learn 0.0720287883passed: 0.301 sec	total: 8m 38s	remaining: 10m 51s
2215:	learn 0.07202617108passed: 0.343 sec	total: 8m 38s	remaining: 10m 51s
2216:	learn 0.07202145296passed: 0.344 sec	total: 8m 39s	remaining: 10m 51s
2217:	learn 0.07201661919passed: 0.309 sec	total: 8m 39s	remaining: 10m 51s
2218:	learn 0.0720138478passed: 0.345 sec	total: 8m 39s	remaining: 10m 51s
2219:	learn 0.07200906129passed: 0.387 sec	total: 8m 40s	remaining: 10m 51s
2220:	learn 0.

2316:	learn 0.07165399434passed: 0.397 sec	total: 9m 11s	remaining: 10m 39s
2317:	learn 0.07164995197passed: 0.302 sec	total: 9m 12s	remaining: 10m 38s
2318:	learn 0.07164671113passed: 0.299 sec	total: 9m 12s	remaining: 10m 38s
2319:	learn 0.07164184224passed: 0.409 sec	total: 9m 12s	remaining: 10m 38s
2320:	learn 0.07163704486passed: 0.329 sec	total: 9m 13s	remaining: 10m 38s
2321:	learn 0.07163418118passed: 0.329 sec	total: 9m 13s	remaining: 10m 38s
2322:	learn 0.07163147208passed: 0.294 sec	total: 9m 13s	remaining: 10m 38s
2323:	learn 0.07162881745passed: 0.317 sec	total: 9m 14s	remaining: 10m 38s
2324:	learn 0.07162293701passed: 0.333 sec	total: 9m 14s	remaining: 10m 37s
2325:	learn 0.07161875243passed: 0.332 sec	total: 9m 14s	remaining: 10m 37s
2326:	learn 0.07161519052passed: 0.316 sec	total: 9m 15s	remaining: 10m 37s
2327:	learn 0.07161171387passed: 0.329 sec	total: 9m 15s	remaining: 10m 37s
2328:	learn 0.0716096587passed: 0.346 sec	total: 9m 15s	remaining: 10m 37s
2329:	learn 0

2425:	learn 0.07125680309passed: 0.307 sec	total: 9m 47s	remaining: 10m 23s
2426:	learn 0.0712507346passed: 0.289 sec	total: 9m 47s	remaining: 10m 23s
2427:	learn 0.07124644366passed: 0.355 sec	total: 9m 48s	remaining: 10m 23s
2428:	learn 0.07124346662passed: 0.34 sec	total: 9m 48s	remaining: 10m 23s
2429:	learn 0.07123974292passed: 0.348 sec	total: 9m 49s	remaining: 10m 22s
2430:	learn 0.0712365334passed: 0.302 sec	total: 9m 49s	remaining: 10m 22s
2431:	learn 0.07123234709passed: 0.331 sec	total: 9m 49s	remaining: 10m 22s
2432:	learn 0.07122861078passed: 0.334 sec	total: 9m 49s	remaining: 10m 22s
2433:	learn 0.07122692626passed: 0.33 sec	total: 9m 50s	remaining: 10m 22s
2434:	learn 0.07122426455passed: 0.317 sec	total: 9m 50s	remaining: 10m 22s
2435:	learn 0.07121886756passed: 0.352 sec	total: 9m 50s	remaining: 10m 22s
2436:	learn 0.0712168666passed: 0.339 sec	total: 9m 51s	remaining: 10m 21s
2437:	learn 0.07121217596passed: 0.337 sec	total: 9m 51s	remaining: 10m 21s
2438:	learn 0.071

2534:	learn 0.07088505659passed: 0.215 sec	total: 10m 21s	remaining: 10m 4s
2535:	learn 0.0708824813passed: 0.201 sec	total: 10m 21s	remaining: 10m 3s
2536:	learn 0.07087879537passed: 0.207 sec	total: 10m 21s	remaining: 10m 3s
2537:	learn 0.07087269039passed: 0.191 sec	total: 10m 21s	remaining: 10m 3s
2538:	learn 0.07087056902passed: 0.229 sec	total: 10m 22s	remaining: 10m 2s
2539:	learn 0.07086630024passed: 0.182 sec	total: 10m 22s	remaining: 10m 2s
2540:	learn 0.07086444658passed: 0.201 sec	total: 10m 22s	remaining: 10m 2s
2541:	learn 0.07086095859passed: 0.235 sec	total: 10m 22s	remaining: 10m 2s
2542:	learn 0.07085693355passed: 0.198 sec	total: 10m 22s	remaining: 10m 1s
2543:	learn 0.07085545249passed: 0.215 sec	total: 10m 23s	remaining: 10m 1s
2544:	learn 0.07085242271passed: 0.202 sec	total: 10m 23s	remaining: 10m 1s
2545:	learn 0.07084833959passed: 0.186 sec	total: 10m 23s	remaining: 10m
2546:	learn 0.07084476773passed: 0.198 sec	total: 10m 23s	remaining: 10m
2547:	learn 0.07083

2643:	learn 0.07049253772passed: 0.24 sec	total: 10m 51s	remaining: 9m 40s
2644:	learn 0.07048439617passed: 0.365 sec	total: 10m 52s	remaining: 9m 40s
2645:	learn 0.07048223664passed: 0.231 sec	total: 10m 52s	remaining: 9m 40s
2646:	learn 0.07047836525passed: 0.243 sec	total: 10m 52s	remaining: 9m 40s
2647:	learn 0.0704742641passed: 0.242 sec	total: 10m 52s	remaining: 9m 39s
2648:	learn 0.07047055387passed: 0.223 sec	total: 10m 53s	remaining: 9m 39s
2649:	learn 0.0704673964passed: 0.225 sec	total: 10m 53s	remaining: 9m 39s
2650:	learn 0.0704653285passed: 0.207 sec	total: 10m 53s	remaining: 9m 39s
2651:	learn 0.07046131695passed: 0.261 sec	total: 10m 53s	remaining: 9m 38s
2652:	learn 0.07045752903passed: 0.251 sec	total: 10m 54s	remaining: 9m 38s
2653:	learn 0.07045416127passed: 0.211 sec	total: 10m 54s	remaining: 9m 38s
2654:	learn 0.070450852passed: 0.231 sec	total: 10m 54s	remaining: 9m 38s
2655:	learn 0.07044716221passed: 0.248 sec	total: 10m 54s	remaining: 9m 37s
2656:	learn 0.0704

2752:	learn 0.07013184047passed: 0.22 sec	total: 11m 18s	remaining: 9m 13s
2753:	learn 0.07012817036passed: 0.198 sec	total: 11m 18s	remaining: 9m 13s
2754:	learn 0.07012389612passed: 0.22 sec	total: 11m 18s	remaining: 9m 12s
2755:	learn 0.070120747passed: 0.434 sec	total: 11m 19s	remaining: 9m 12s
2756:	learn 0.07011706204passed: 0.45 sec	total: 11m 19s	remaining: 9m 12s
2757:	learn 0.07011242248passed: 0.207 sec	total: 11m 19s	remaining: 9m 12s
2758:	learn 0.07010908437passed: 0.187 sec	total: 11m 19s	remaining: 9m 12s
2759:	learn 0.07010502261passed: 0.201 sec	total: 11m 20s	remaining: 9m 11s
2760:	learn 0.07010236719passed: 0.199 sec	total: 11m 20s	remaining: 9m 11s
2761:	learn 0.07009978934passed: 0.189 sec	total: 11m 20s	remaining: 9m 11s
2762:	learn 0.07009405149passed: 0.18 sec	total: 11m 20s	remaining: 9m 11s
2763:	learn 0.07009115098passed: 0.196 sec	total: 11m 20s	remaining: 9m 10s
2764:	learn 0.0700876816passed: 0.231 sec	total: 11m 21s	remaining: 9m 10s
2765:	learn 0.07008

2970:	learn 0.06941048944passed: 0.241 sec	total: 12m 13s	remaining: 8m 21s
2971:	learn 0.06940628917passed: 0.25 sec	total: 12m 14s	remaining: 8m 20s
2972:	learn 0.06940246908passed: 0.262 sec	total: 12m 14s	remaining: 8m 20s
2973:	learn 0.0693992699passed: 0.255 sec	total: 12m 14s	remaining: 8m 20s
2974:	learn 0.0693959637passed: 0.25 sec	total: 12m 14s	remaining: 8m 20s
2975:	learn 0.06939344657passed: 0.246 sec	total: 12m 15s	remaining: 8m 20s
2976:	learn 0.06938805798passed: 0.248 sec	total: 12m 15s	remaining: 8m 19s
2977:	learn 0.06938630281passed: 0.24 sec	total: 12m 15s	remaining: 8m 19s
2978:	learn 0.0693844351passed: 0.25 sec	total: 12m 15s	remaining: 8m 19s
2979:	learn 0.06938078645passed: 0.274 sec	total: 12m 16s	remaining: 8m 19s
2980:	learn 0.06937760209passed: 0.241 sec	total: 12m 16s	remaining: 8m 18s
2981:	learn 0.06937428125passed: 0.219 sec	total: 12m 16s	remaining: 8m 18s
2982:	learn 0.06937042612passed: 0.233 sec	total: 12m 16s	remaining: 8m 18s
2983:	learn 0.06936

3079:	learn 0.06906897228passed: 0.254 sec	total: 12m 40s	remaining: 7m 54s
3080:	learn 0.06906687565passed: 0.228 sec	total: 12m 41s	remaining: 7m 54s
3081:	learn 0.06906376581passed: 0.227 sec	total: 12m 41s	remaining: 7m 53s
3082:	learn 0.0690599149passed: 0.243 sec	total: 12m 41s	remaining: 7m 53s
3083:	learn 0.0690566259passed: 0.257 sec	total: 12m 41s	remaining: 7m 53s
3084:	learn 0.06905302701passed: 0.246 sec	total: 12m 42s	remaining: 7m 53s
3085:	learn 0.06904988475passed: 0.249 sec	total: 12m 42s	remaining: 7m 52s
3086:	learn 0.06904690929passed: 0.24 sec	total: 12m 42s	remaining: 7m 52s
3087:	learn 0.06904333236passed: 0.254 sec	total: 12m 42s	remaining: 7m 52s
3088:	learn 0.06903954379passed: 0.244 sec	total: 12m 43s	remaining: 7m 52s
3089:	learn 0.06903678408passed: 0.265 sec	total: 12m 43s	remaining: 7m 51s
3090:	learn 0.06903433929passed: 0.253 sec	total: 12m 43s	remaining: 7m 51s
3091:	learn 0.06903217507passed: 0.237 sec	total: 12m 43s	remaining: 7m 51s
3092:	learn 0.0

3188:	learn 0.06871889687passed: 0.267 sec	total: 13m 7s	remaining: 7m 27s
3189:	learn 0.06871681549passed: 0.261 sec	total: 13m 7s	remaining: 7m 27s
3190:	learn 0.06871465883passed: 0.26 sec	total: 13m 8s	remaining: 7m 26s
3191:	learn 0.06871171446passed: 0.238 sec	total: 13m 8s	remaining: 7m 26s
3192:	learn 0.06870872286passed: 0.248 sec	total: 13m 8s	remaining: 7m 26s
3193:	learn 0.06870601457passed: 0.247 sec	total: 13m 8s	remaining: 7m 26s
3194:	learn 0.06870303935passed: 0.267 sec	total: 13m 9s	remaining: 7m 25s
3195:	learn 0.06869962536passed: 0.251 sec	total: 13m 9s	remaining: 7m 25s
3196:	learn 0.06869726434passed: 0.234 sec	total: 13m 9s	remaining: 7m 25s
3197:	learn 0.06869460361passed: 0.214 sec	total: 13m 9s	remaining: 7m 25s
3198:	learn 0.06869076717passed: 0.263 sec	total: 13m 10s	remaining: 7m 24s
3199:	learn 0.0686877238passed: 0.261 sec	total: 13m 10s	remaining: 7m 24s
3200:	learn 0.06868540682passed: 0.225 sec	total: 13m 10s	remaining: 7m 24s
3201:	learn 0.0686830205

3297:	learn 0.06839879026passed: 0.246 sec	total: 13m 35s	remaining: 7m
3298:	learn 0.0683937597passed: 0.256 sec	total: 13m 35s	remaining: 7m
3299:	learn 0.06839053923passed: 0.354 sec	total: 13m 36s	remaining: 7m
3300:	learn 0.06838778437passed: 0.354 sec	total: 13m 36s	remaining: 7m
3301:	learn 0.06838364819passed: 0.351 sec	total: 13m 36s	remaining: 7m
3302:	learn 0.06838162045passed: 0.197 sec	total: 13m 36s	remaining: 6m 59s
3303:	learn 0.06838161814passed: 0.0739 sec	total: 13m 37s	remaining: 6m 59s
3304:	learn 0.0683778687passed: 0.247 sec	total: 13m 37s	remaining: 6m 59s
3305:	learn 0.06837435793passed: 0.261 sec	total: 13m 37s	remaining: 6m 58s
3306:	learn 0.06837435645passed: 0.0679 sec	total: 13m 37s	remaining: 6m 58s
3307:	learn 0.06837332123passed: 0.223 sec	total: 13m 37s	remaining: 6m 58s
3308:	learn 0.06836927266passed: 0.203 sec	total: 13m 38s	remaining: 6m 58s
3309:	learn 0.06836608799passed: 0.368 sec	total: 13m 38s	remaining: 6m 57s
3310:	learn 0.06836243251passed:

3406:	learn 0.06808730876passed: 0.208 sec	total: 14m 3s	remaining: 6m 34s
3407:	learn 0.06808591099passed: 0.206 sec	total: 14m 3s	remaining: 6m 34s
3408:	learn 0.0680837012passed: 0.214 sec	total: 14m 3s	remaining: 6m 33s
3409:	learn 0.06808174034passed: 0.215 sec	total: 14m 3s	remaining: 6m 33s
3410:	learn 0.06807943626passed: 0.209 sec	total: 14m 4s	remaining: 6m 33s
3411:	learn 0.06807621924passed: 0.193 sec	total: 14m 4s	remaining: 6m 32s
3412:	learn 0.06807306966passed: 0.186 sec	total: 14m 4s	remaining: 6m 32s
3413:	learn 0.06806847123passed: 0.209 sec	total: 14m 4s	remaining: 6m 32s
3414:	learn 0.06806649401passed: 0.224 sec	total: 14m 4s	remaining: 6m 32s
3415:	learn 0.06806415144passed: 0.195 sec	total: 14m 5s	remaining: 6m 31s
3416:	learn 0.06806049972passed: 0.207 sec	total: 14m 5s	remaining: 6m 31s
3417:	learn 0.06805894608passed: 0.196 sec	total: 14m 5s	remaining: 6m 31s
3418:	learn 0.06805639725passed: 0.211 sec	total: 14m 5s	remaining: 6m 31s
3419:	learn 0.06805444649p

3515:	learn 0.06777769341passed: 0.592 sec	total: 14m 27s	remaining: 6m 6s
3516:	learn 0.06777571201passed: 0.665 sec	total: 14m 28s	remaining: 6m 6s
3517:	learn 0.06777388984passed: 0.617 sec	total: 14m 29s	remaining: 6m 6s
3518:	learn 0.06776945245passed: 0.508 sec	total: 14m 29s	remaining: 6m 5s
3519:	learn 0.06776731274passed: 0.624 sec	total: 14m 30s	remaining: 6m 5s
3520:	learn 0.06776657926passed: 0.667 sec	total: 14m 30s	remaining: 6m 5s
3521:	learn 0.0677632807passed: 0.606 sec	total: 14m 31s	remaining: 6m 5s
3522:	learn 0.06776003122passed: 0.731 sec	total: 14m 32s	remaining: 6m 5s
3523:	learn 0.0677581557passed: 0.296 sec	total: 14m 32s	remaining: 6m 5s
3524:	learn 0.06775486562passed: 0.205 sec	total: 14m 32s	remaining: 6m 5s
3525:	learn 0.06775264977passed: 0.254 sec	total: 14m 32s	remaining: 6m 4s
3526:	learn 0.06775096705passed: 0.434 sec	total: 14m 33s	remaining: 6m 4s
3527:	learn 0.06774652202passed: 0.537 sec	total: 14m 33s	remaining: 6m 4s
3528:	learn 0.06774431093pa

3624:	learn 0.06747172837passed: 0.259 sec	total: 15m	remaining: 5m 41s
3625:	learn 0.06746948359passed: 0.231 sec	total: 15m	remaining: 5m 41s
3626:	learn 0.06746633278passed: 0.225 sec	total: 15m	remaining: 5m 40s
3627:	learn 0.06746348932passed: 0.232 sec	total: 15m	remaining: 5m 40s
3628:	learn 0.06746018708passed: 0.223 sec	total: 15m	remaining: 5m 40s
3629:	learn 0.06745695385passed: 0.235 sec	total: 15m 1s	remaining: 5m 40s
3630:	learn 0.06745457754passed: 0.236 sec	total: 15m 1s	remaining: 5m 39s
3631:	learn 0.06745205651passed: 0.21 sec	total: 15m 1s	remaining: 5m 39s
3632:	learn 0.06744796622passed: 0.303 sec	total: 15m 1s	remaining: 5m 39s
3633:	learn 0.06744520415passed: 0.295 sec	total: 15m 2s	remaining: 5m 39s
3634:	learn 0.0674434658passed: 0.203 sec	total: 15m 2s	remaining: 5m 38s
3635:	learn 0.06744257826passed: 0.19 sec	total: 15m 2s	remaining: 5m 38s
3636:	learn 0.0674405638passed: 0.245 sec	total: 15m 2s	remaining: 5m 38s
3637:	learn 0.06743798568passed: 0.374 sec	t

3734:	learn 0.06716608329passed: 0.198 sec	total: 15m 27s	remaining: 5m 14s
3735:	learn 0.06716354508passed: 0.187 sec	total: 15m 27s	remaining: 5m 13s
3736:	learn 0.06716107741passed: 0.207 sec	total: 15m 27s	remaining: 5m 13s
3737:	learn 0.06715941914passed: 0.203 sec	total: 15m 28s	remaining: 5m 13s
3738:	learn 0.06715616097passed: 0.203 sec	total: 15m 28s	remaining: 5m 13s
3739:	learn 0.06715327176passed: 0.21 sec	total: 15m 28s	remaining: 5m 12s
3740:	learn 0.06715052807passed: 0.194 sec	total: 15m 28s	remaining: 5m 12s
3741:	learn 0.06714888754passed: 0.189 sec	total: 15m 28s	remaining: 5m 12s
3742:	learn 0.06714688183passed: 0.191 sec	total: 15m 29s	remaining: 5m 12s
3743:	learn 0.06714355367passed: 0.205 sec	total: 15m 29s	remaining: 5m 11s
3744:	learn 0.0671412058passed: 0.517 sec	total: 15m 29s	remaining: 5m 11s
3745:	learn 0.0671347329passed: 0.502 sec	total: 15m 30s	remaining: 5m 11s
3746:	learn 0.06713125939passed: 0.485 sec	total: 15m 30s	remaining: 5m 11s
3747:	learn 0.0

3843:	learn 0.06688102523passed: 0.238 sec	total: 15m 57s	remaining: 4m 48s
3844:	learn 0.06687863125passed: 0.289 sec	total: 15m 58s	remaining: 4m 47s
3845:	learn 0.06687566159passed: 0.254 sec	total: 15m 58s	remaining: 4m 47s
3846:	learn 0.06687280515passed: 0.269 sec	total: 15m 58s	remaining: 4m 47s
3847:	learn 0.06686841628passed: 0.232 sec	total: 15m 58s	remaining: 4m 47s
3848:	learn 0.06686656093passed: 0.243 sec	total: 15m 59s	remaining: 4m 46s
3849:	learn 0.0668641861passed: 0.201 sec	total: 15m 59s	remaining: 4m 46s
3850:	learn 0.06686165024passed: 0.211 sec	total: 15m 59s	remaining: 4m 46s
3851:	learn 0.06685994887passed: 0.219 sec	total: 15m 59s	remaining: 4m 46s
3852:	learn 0.0668582867passed: 0.219 sec	total: 15m 59s	remaining: 4m 45s
3853:	learn 0.06685686415passed: 0.219 sec	total: 16m	remaining: 4m 45s
3854:	learn 0.06685439236passed: 0.229 sec	total: 16m	remaining: 4m 45s
3855:	learn 0.06685126569passed: 0.231 sec	total: 16m	remaining: 4m 44s
3856:	learn 0.06684735478p

3953:	learn 0.06657461653passed: 0.493 sec	total: 16m 27s	remaining: 4m 21s
3954:	learn 0.06657395997passed: 0.547 sec	total: 16m 27s	remaining: 4m 20s
3955:	learn 0.0665697861passed: 0.246 sec	total: 16m 27s	remaining: 4m 20s
3956:	learn 0.06656748353passed: 0.184 sec	total: 16m 28s	remaining: 4m 20s
3957:	learn 0.06656535469passed: 0.185 sec	total: 16m 28s	remaining: 4m 20s
3958:	learn 0.06656155847passed: 0.181 sec	total: 16m 28s	remaining: 4m 19s
3959:	learn 0.06656011185passed: 0.181 sec	total: 16m 28s	remaining: 4m 19s
3960:	learn 0.06655847244passed: 0.178 sec	total: 16m 28s	remaining: 4m 19s
3961:	learn 0.06655481126passed: 0.178 sec	total: 16m 28s	remaining: 4m 19s
3962:	learn 0.06655109223passed: 0.18 sec	total: 16m 29s	remaining: 4m 18s
3963:	learn 0.06654863466passed: 0.198 sec	total: 16m 29s	remaining: 4m 18s
3964:	learn 0.06654616047passed: 0.183 sec	total: 16m 29s	remaining: 4m 18s
3965:	learn 0.06654434415passed: 0.187 sec	total: 16m 29s	remaining: 4m 18s
3966:	learn 0.

4062:	learn 0.06629619133passed: 0.203 sec	total: 16m 54s	remaining: 3m 53s
4063:	learn 0.06629367592passed: 0.218 sec	total: 16m 54s	remaining: 3m 53s
4064:	learn 0.06629144596passed: 0.256 sec	total: 16m 54s	remaining: 3m 53s
4065:	learn 0.06628926691passed: 0.303 sec	total: 16m 54s	remaining: 3m 53s
4066:	learn 0.06628658124passed: 0.569 sec	total: 16m 55s	remaining: 3m 52s
4067:	learn 0.06628437606passed: 0.27 sec	total: 16m 55s	remaining: 3m 52s
4068:	learn 0.06628140038passed: 0.28 sec	total: 16m 55s	remaining: 3m 52s
4069:	learn 0.06628044983passed: 0.28 sec	total: 16m 56s	remaining: 3m 52s
4070:	learn 0.06627833874passed: 0.215 sec	total: 16m 56s	remaining: 3m 51s
4071:	learn 0.06627537885passed: 0.218 sec	total: 16m 56s	remaining: 3m 51s
4072:	learn 0.0662728835passed: 0.202 sec	total: 16m 56s	remaining: 3m 51s
4073:	learn 0.06627115163passed: 0.243 sec	total: 16m 57s	remaining: 3m 51s
4074:	learn 0.06626986146passed: 0.229 sec	total: 16m 57s	remaining: 3m 50s
4075:	learn 0.06

4171:	learn 0.06603281398passed: 0.183 sec	total: 17m 22s	remaining: 3m 26s
4172:	learn 0.06603076473passed: 0.191 sec	total: 17m 22s	remaining: 3m 26s
4173:	learn 0.06602811563passed: 0.19 sec	total: 17m 22s	remaining: 3m 26s
4174:	learn 0.06602464077passed: 0.225 sec	total: 17m 23s	remaining: 3m 26s
4175:	learn 0.0660221343passed: 0.217 sec	total: 17m 23s	remaining: 3m 25s
4176:	learn 0.06602017558passed: 0.194 sec	total: 17m 23s	remaining: 3m 25s
4177:	learn 0.0660186475passed: 0.187 sec	total: 17m 23s	remaining: 3m 25s
4178:	learn 0.06601695389passed: 0.184 sec	total: 17m 23s	remaining: 3m 25s
4179:	learn 0.06601468209passed: 0.198 sec	total: 17m 24s	remaining: 3m 24s
4180:	learn 0.06601148282passed: 0.208 sec	total: 17m 24s	remaining: 3m 24s
4181:	learn 0.0660089285passed: 0.205 sec	total: 17m 24s	remaining: 3m 24s
4182:	learn 0.06600596203passed: 0.39 sec	total: 17m 24s	remaining: 3m 24s
4183:	learn 0.06600360992passed: 0.259 sec	total: 17m 25s	remaining: 3m 23s
4184:	learn 0.066

4281:	learn 0.06576043839passed: 0.211 sec	total: 17m 46s	remaining: 2m 58s
4282:	learn 0.06575757789passed: 0.213 sec	total: 17m 46s	remaining: 2m 58s
4283:	learn 0.06575522001passed: 0.197 sec	total: 17m 46s	remaining: 2m 58s
4284:	learn 0.06575335825passed: 0.221 sec	total: 17m 47s	remaining: 2m 58s
4285:	learn 0.06575095945passed: 0.258 sec	total: 17m 47s	remaining: 2m 57s
4286:	learn 0.06574633612passed: 0.273 sec	total: 17m 47s	remaining: 2m 57s
4287:	learn 0.06574354208passed: 0.203 sec	total: 17m 47s	remaining: 2m 57s
4288:	learn 0.0657402342passed: 0.199 sec	total: 17m 48s	remaining: 2m 57s
4289:	learn 0.06573729931passed: 0.183 sec	total: 17m 48s	remaining: 2m 56s
4290:	learn 0.06573568816passed: 0.187 sec	total: 17m 48s	remaining: 2m 56s
4291:	learn 0.06573206734passed: 0.188 sec	total: 17m 48s	remaining: 2m 56s
4292:	learn 0.06572987185passed: 0.188 sec	total: 17m 48s	remaining: 2m 56s
4293:	learn 0.0657265024passed: 0.195 sec	total: 17m 48s	remaining: 2m 55s
4294:	learn 0.

4390:	learn 0.06547914204passed: 0.256 sec	total: 18m 8s	remaining: 2m 31s
4391:	learn 0.06547664808passed: 0.26 sec	total: 18m 9s	remaining: 2m 30s
4392:	learn 0.06547310523passed: 0.262 sec	total: 18m 9s	remaining: 2m 30s
4393:	learn 0.06546929189passed: 0.282 sec	total: 18m 9s	remaining: 2m 30s
4394:	learn 0.06546798888passed: 0.318 sec	total: 18m 9s	remaining: 2m 30s
4395:	learn 0.06546543991passed: 0.418 sec	total: 18m 10s	remaining: 2m 29s
4396:	learn 0.06546258879passed: 0.556 sec	total: 18m 10s	remaining: 2m 29s
4397:	learn 0.06546064684passed: 0.205 sec	total: 18m 11s	remaining: 2m 29s
4398:	learn 0.06545821203passed: 0.23 sec	total: 18m 11s	remaining: 2m 29s
4399:	learn 0.06545611564passed: 0.222 sec	total: 18m 11s	remaining: 2m 28s
4400:	learn 0.06545468338passed: 0.227 sec	total: 18m 11s	remaining: 2m 28s
4401:	learn 0.06545459198passed: 0.169 sec	total: 18m 11s	remaining: 2m 28s
4402:	learn 0.0654515018passed: 0.268 sec	total: 18m 12s	remaining: 2m 28s
4403:	learn 0.065447

4500:	learn 0.06520661657passed: 0.182 sec	total: 18m 34s	remaining: 2m 3s
4501:	learn 0.06520604478passed: 0.182 sec	total: 18m 35s	remaining: 2m 3s
4502:	learn 0.06520536286passed: 0.18 sec	total: 18m 35s	remaining: 2m 3s
4503:	learn 0.06520311722passed: 0.197 sec	total: 18m 35s	remaining: 2m 2s
4504:	learn 0.06519859965passed: 0.19 sec	total: 18m 35s	remaining: 2m 2s
4505:	learn 0.0651973033passed: 0.184 sec	total: 18m 35s	remaining: 2m 2s
4506:	learn 0.06519458164passed: 0.178 sec	total: 18m 35s	remaining: 2m 2s
4507:	learn 0.0651917537passed: 0.188 sec	total: 18m 36s	remaining: 2m 1s
4508:	learn 0.06518760239passed: 0.185 sec	total: 18m 36s	remaining: 2m 1s
4509:	learn 0.06518447884passed: 0.206 sec	total: 18m 36s	remaining: 2m 1s
4510:	learn 0.0651813642passed: 0.19 sec	total: 18m 36s	remaining: 2m 1s
4511:	learn 0.06517804217passed: 0.193 sec	total: 18m 36s	remaining: 2m
4512:	learn 0.06517417137passed: 0.189 sec	total: 18m 37s	remaining: 2m
4513:	learn 0.06517118014passed: 0.18

4609:	learn 0.06492626927passed: 0.185 sec	total: 18m 56s	remaining: 1m 36s
4610:	learn 0.0649242371passed: 0.189 sec	total: 18m 56s	remaining: 1m 35s
4611:	learn 0.06492076556passed: 0.195 sec	total: 18m 57s	remaining: 1m 35s
4612:	learn 0.06491896609passed: 0.186 sec	total: 18m 57s	remaining: 1m 35s
4613:	learn 0.06491634554passed: 0.19 sec	total: 18m 57s	remaining: 1m 35s
4614:	learn 0.06491432902passed: 0.2 sec	total: 18m 57s	remaining: 1m 34s
4615:	learn 0.06491199172passed: 0.183 sec	total: 18m 57s	remaining: 1m 34s
4616:	learn 0.06490962002passed: 0.189 sec	total: 18m 57s	remaining: 1m 34s
4617:	learn 0.06490540063passed: 0.197 sec	total: 18m 58s	remaining: 1m 34s
4618:	learn 0.06490229617passed: 0.204 sec	total: 18m 58s	remaining: 1m 33s
4619:	learn 0.06489817293passed: 0.196 sec	total: 18m 58s	remaining: 1m 33s
4620:	learn 0.06489685061passed: 0.19 sec	total: 18m 58s	remaining: 1m 33s
4621:	learn 0.06489565959passed: 0.194 sec	total: 18m 58s	remaining: 1m 33s
4622:	learn 0.064

4828:	learn 0.06441045891passed: 0.176 sec	total: 19m 37s	remaining: 41.7s
4829:	learn 0.06440838273passed: 0.183 sec	total: 19m 37s	remaining: 41.4s
4830:	learn 0.0644063965passed: 0.18 sec	total: 19m 37s	remaining: 41.2s
4831:	learn 0.0644047779passed: 0.181 sec	total: 19m 37s	remaining: 40.9s
4832:	learn 0.06440311233passed: 0.181 sec	total: 19m 37s	remaining: 40.7s
4833:	learn 0.06440002172passed: 0.179 sec	total: 19m 38s	remaining: 40.5s
4834:	learn 0.06439676803passed: 0.181 sec	total: 19m 38s	remaining: 40.2s
4835:	learn 0.06439471669passed: 0.179 sec	total: 19m 38s	remaining: 40s
4836:	learn 0.06439243215passed: 0.184 sec	total: 19m 38s	remaining: 39.7s
4837:	learn 0.06438934272passed: 0.185 sec	total: 19m 38s	remaining: 39.5s
4838:	learn 0.0643869155passed: 0.183 sec	total: 19m 39s	remaining: 39.2s
4839:	learn 0.06438508332passed: 0.181 sec	total: 19m 39s	remaining: 39s
4840:	learn 0.06438089092passed: 0.187 sec	total: 19m 39s	remaining: 38.7s
4841:	learn 0.06437856884passed: 

4938:	learn 0.06415300608passed: 0.186 sec	total: 19m 57s	remaining: 14.8s
4939:	learn 0.06415213178passed: 0.177 sec	total: 19m 57s	remaining: 14.5s
4940:	learn 0.06414934151passed: 0.182 sec	total: 19m 57s	remaining: 14.3s
4941:	learn 0.06414833579passed: 0.181 sec	total: 19m 57s	remaining: 14.1s
4942:	learn 0.06414522989passed: 0.185 sec	total: 19m 58s	remaining: 13.8s
4943:	learn 0.06414357946passed: 0.179 sec	total: 19m 58s	remaining: 13.6s
4944:	learn 0.06414235796passed: 0.18 sec	total: 19m 58s	remaining: 13.3s
4945:	learn 0.06414017724passed: 0.186 sec	total: 19m 58s	remaining: 13.1s
4946:	learn 0.06413608924passed: 0.183 sec	total: 19m 58s	remaining: 12.8s
4947:	learn 0.0641340903passed: 0.183 sec	total: 19m 58s	remaining: 12.6s
4948:	learn 0.06413062571passed: 0.187 sec	total: 19m 59s	remaining: 12.4s
4949:	learn 0.06412885614passed: 0.179 sec	total: 19m 59s	remaining: 12.1s
4950:	learn 0.06412725064passed: 0.181 sec	total: 19m 59s	remaining: 11.9s
4951:	learn 0.06412473161pa

In [6]:
    #some parameters
    DO_LOWESS = False
    hyperopt = False
    # Run LGBMRegressor with hyperopt optimization
    lgbm = HyperoptModel(train.copy(), test.copy(), 'lgbm', cv=5)
    lgbm.raw_features = []
    lgbm.pipeline = Pipeline([
        ('prepare_features', FeatureUnion([
            ('user_features', CustomDataFrameMapper(user_features_transformations)),
            #('time_features', CustomDataFrameMapper(time_features_transformations)),
            ('answer_features', CustomDataFrameMapper(answer_features_transformations)),
            ('question_features', CustomDataFrameMapper(question_features_transformations))
        ])),
        ('estimate', lgb.LGBMRegressor(**{'bagging_fraction': 0.9583593582453502,
                                          'feature_fraction': 0.797191970090108,
                                          'lambda_l1': 0,
                                          'lambda_l2': 0,
                                          'learning_rate': 0.06967397660277702,
                                          'min_data_in_leaf': 2,
                                          'min_sum_hessian_in_leaf': 3.8117576166032006,
                                          'n_estimators': 435,
                                          'num_leaves': 287,
                                          'objective': 'regression',
                                          'seed': 0}))
    ])
    for transformer in lgbm.pipeline.named_steps['prepare_features'].transformer_list:
        lgbm.raw_features += [t[0] if isinstance(t[0], str) else t[0][0] for t in transformer[1].features]

    """ find number of trees """
    # num_trees_train, num_trees_eval = train_test_split(train, test_size=0.2, random_state=0)
    # X = num_trees_train[list(filter(lambda column: column in lgbm.raw_features, lgbm.train.columns))]
    # y = num_trees_train['score']
    #
    # eval_X = num_trees_eval[list(filter(lambda column: column in lgbm.raw_features, lgbm.train.columns))]
    # eval_X = lgbm.pipeline.named_steps['prepare_features'].fit_transform(eval_X)
    # eval_set = (eval_X, num_trees_eval['score'])
    #
    # best = lgbm.pipeline.fit(X=X, y=y, estimate__eval_set=eval_set, estimate__early_stopping_rounds=10)
    # print(best.named_steps['estimate'].best_iteration)

    lgbm.space = {
        'estimate__objective': hp.choice('estimate__objective', ['regression']),
        'estimate__n_estimators': hp.choice('estimate__n_estimators', [400]),
        'estimate__seed': hp.choice('estimate__seed', [0]),

        'estimate__learning_rate': hp.loguniform('estimate__learning_rate', -7, 0),
        'estimate__num_leaves': scope.int(hp.qloguniform('estimate__num_leaves', 1, 7, 1)),
        'estimate__feature_fraction': hp.uniform('estimate__feature_fraction', 0.5, 1),
        'estimate__bagging_fraction': hp.uniform('estimate__bagging_fraction', 0.5, 1),
        'estimate__min_data_in_leaf': scope.int(hp.qloguniform('estimate__min_data_in_leaf', 0, 6, 1)),
        'estimate__min_sum_hessian_in_leaf': hp.loguniform('estimate__min_sum_hessian_in_leaf', -16, 5),
        'estimate__lambda_l1': hp.choice('lambda_l1', [0, hp.loguniform('estimate__lambda_l1_positive', -16, 2)]),
        'estimate__lambda_l2': hp.choice('lambda_l2', [0, hp.loguniform('estimate__lambda_l2_positive', -16, 2)]),
    }

    if hyperopt:
        lgbm.run(do_lowess=DO_LOWESS)
    else:
        # train with default params
        lgbm.pipeline.fit(X=lgbm.X_train, y=lgbm.y_train)
        lgbm.model = lgbm.pipeline
        lgbm.stats()
        lgbm.plot_predicted_vs_actual()
        lgbm.plot_residuals(r_type='raw', do_lowess=False)
        lgbm.plot_feature_importance()

Stats (train | test):
	R^2 score:		0.7285
					0.3895
	RMSE:			0.0556
					0.0834
	Mean error:		0.0429
					0.0639
	Pearson:		0.8709
					0.6241
	Spearman:		0.8618
					0.6313
	KendallTau:		0.6779
					0.4515

Plotting predicted vs. actual ...done

Plotting residuals ...done

Plotting feature importances ...done



In [8]:
    #some parameters
    DO_LOWESS = False
    hyperopt = False
    # Run RandomForestRegressor with hyperopt optimization
    rf = HyperoptModel(train.copy(), test.copy(),'rf', cv=3, max_evals = 10)
    rf.raw_features = []
    rf.pipeline = Pipeline([
        ('prepare_features', FeatureUnion([
            ('user_features', CustomDataFrameMapper(user_features_transformations)),
            ('time_features', CustomDataFrameMapper(time_features_transformations)),
            ('answer_features', CustomDataFrameMapper(answer_features_transformations)),
            ('question_features', CustomDataFrameMapper(question_features_transformations))
        ])),
        ('estimate', RandomForestRegressor(**{'max_features': 0.5907165396346349,
                                              'min_samples_leaf': 10,
                                              'n_estimators': 2208,
                                              'oob_score': True,
                                              'random_state': 0}))
    ])
    for transformer in rf.pipeline.named_steps['prepare_features'].transformer_list:
        rf.raw_features += [t[0] if isinstance(t[0], str) else t[0][0] for t in transformer[1].features]

    rf.space = {
        'estimate__random_state': hp.choice('estimate__random_state', [0]),
        'estimate__oob_score': hp.choice('estimate__oob_score', [True]),

        'estimate__max_features': hp.uniform('estimate__max_features', 0, 1.),
        'estimate__n_estimators': hp.choice('estimate__n_estimators', range(1, 3000 + 1)),
        #'estimate__criterion': hp.choice('estimate__criterion', ['gini', 'entropy']),
        'estimate__min_samples_leaf': hp.choice('estimate__min_samples_leaf', range(1, 100 + 1)),
        #'estimate__scale': hp.choice('estimate__scale', [0, 1.]),
        #'estimate__normalize': hp.choice('estimate__normalize', [0, 1.]),
    }

    if hyperopt:
        rf.run(do_lowess=DO_LOWESS)
    else:
        # train with default params
        rf.pipeline.fit(X=rf.X_train, y=rf.y_train)
        rf.model = rf.pipeline
        rf.stats()
        rf.plot_predicted_vs_actual()
        rf.plot_residuals(r_type='raw', do_lowess=False)
        rf.plot_feature_importance()

Stats (train | test):
	R^2 score:		0.6091
					0.3897
	RMSE:			0.0667
					0.0834
	Mean error:		0.0502
					0.0634
	Pearson:		0.7955
					0.6243
	Spearman:		0.8069
					0.6355
	KendallTau:		0.6151
					0.4557

Plotting predicted vs. actual ...done

Plotting residuals ...done

Plotting feature importances ...done



In [9]:
    #some parameters
    DO_LOWESS = False
    hyperopt = False
    # Run SVR with hyperopt optimization
    answer_features_transformations, question_features_transformations, time_features_transformations, user_features_transformations = _svr_features_transformations()

    svm = HyperoptModel(train.copy(), test.copy(), 'svr', cv=3)
    svm.pipeline = Pipeline([
        ('prepare_features', FeatureUnion([
            ('user_features', CustomDataFrameMapper(user_features_transformations)),
            ('time_features', CustomDataFrameMapper(time_features_transformations)),
            ('answer_features', CustomDataFrameMapper(answer_features_transformations)),
            ('question_features', CustomDataFrameMapper(question_features_transformations))
        ])),
        ('estimate', SVR(C=3.376124349816575, gamma=0.0069678844996990535, kernel='rbf'))
    ])
    svm.raw_features = []
    for transformer in svm.pipeline.named_steps['prepare_features'].transformer_list:
        svm.raw_features += [t[0] if isinstance(t[0], str) else t[0][0] for t in transformer[1].features]

    """
    1) The C parameter trades off misclassification of training examples against simplicity of the decision surface. 
    A low C makes the decision surface smooth, while a high C aims at classifying all training examples correctly 
        by giving the model freedom to select more samples as support vectors.
        
    2)  The gamma parameter defines how far the influence of a single training example reaches, 
    with low values meaning ‘far’ and high values meaning ‘close’. 
    The gamma parameters can be seen as the inverse of the 
        radius of influence of samples selected by the model as support vectors.
    """

    # default_gamma = 1. / len(svm.raw_features)
    svm.space = {
        'estimate__C': hp.uniform('estimate__C', 0, 10.),
        'estimate__kernel': hp.choice('estimate__kernel', ['linear', 'sigmoid', 'rbf']),
        'estimate__gamma': hp.uniform('estimate__gamma', 0, 10.),
    }

    if hyperopt:
        svm.run(do_lowess=DO_LOWESS)
    else:
        # train with default params
        svm.pipeline.fit(X=svm.X_train, y=svm.y_train)
        svm.model = svm.pipeline
        svm.stats()
        svm.plot_predicted_vs_actual()
        svm.plot_residuals(r_type='raw', do_lowess=False)
        svm.plot_feature_importance()

Stats (train | test):
	R^2 score:		0.4494
					0.3180
	RMSE:			0.0791
					0.0881
	Mean error:		0.0637
					0.0686
	Pearson:		0.6777
					0.5688
	Spearman:		0.6773
					0.5801
	KendallTau:		0.4885
					0.4096

Plotting predicted vs. actual ...done

Plotting residuals ...done



In [7]:
    #some parameters
    DO_LOWESS = False
    hyperopt = True
    # Run MLPRegressor with hyperopt optimization
    nn = HyperoptModel(train.copy(), test.copy(),'nn', cv=3, max_evals=100)
    nn.raw_features = []

    nn.pipeline = Pipeline([
         ('prepare_features', FeatureUnion([
            ('user_features', CustomDataFrameMapper(user_features_transformations)),
            ('time_features', CustomDataFrameMapper(time_features_transformations)),
            ('answer_features', CustomDataFrameMapper(answer_features_transformations)),
            ('question_features', CustomDataFrameMapper(question_features_transformations))
        ])),
        ('estimate', MLPRegressor())
    ])
    
    for transformer in nn.pipeline.named_steps['prepare_features'].transformer_list:
        nn.raw_features += [t[0] if isinstance(t[0], str) else t[0][0] for t in transformer[1].features]
    


    nn.space = {
         'estimate__alpha' : hp.uniform('estimate__alpha', 0.00001, 1),
         'estimate__activation' : hp.choice('estimate__activation', ['logistic']), # 'identity', 'logistic', 'tanh', 'relu'
         #'estimate__learning_rate' : hp.choice('estimate__learning_rate', ['constant', 'invscaling', 'adaptive']),
         'estimate__hidden_layer_sizes' : (scope.int(hp.uniform('estimate__first_layer', 1, 100))),
         'estimate__solver' : hp.choice('estimate__solver', ['adam']), #'lbfgs', 'sgd',
         #'estimate__max_iter' : scope.int(hp.uniform('estimate__max_iter', 500, 1000))
    }

    if hyperopt:
        nn.run(do_lowess=DO_LOWESS)
    else:
        # train with default params
        nn.pipeline.fit(X=nn.X_train, y=nn.y_train)
        nn.model = nn.pipeline
        nn.stats()
        nn.plot_predicted_vs_actual(do_lowess=DO_LOWESS)
        nn.plot_feature_importance()
        nn.qq_plot()

Performing parameters optimization...
[1/100]	cv_eval_time=335.20 sec	RMSE=0.106372	R^2=-0.002218
[2/100]	cv_eval_time=345.03 sec	RMSE=0.106391	R^2=-0.011225
[3/100]	cv_eval_time=361.85 sec	RMSE=0.106283	R^2=-0.008827
[4/100]	cv_eval_time=352.93 sec	RMSE=0.105946	R^2=-0.012185
[5/100]	cv_eval_time=341.24 sec	RMSE=0.106091	R^2=-0.000501
[6/100]	cv_eval_time=344.44 sec	RMSE=0.108364	R^2=0.010731
[7/100]	cv_eval_time=315.00 sec	RMSE=0.106169	R^2=-0.000413
[8/100]	cv_eval_time=329.25 sec	RMSE=0.105362	R^2=0.009370
[9/100]	cv_eval_time=337.77 sec	RMSE=0.106850	R^2=-0.013371
[10/100]	cv_eval_time=321.44 sec	RMSE=0.106537	R^2=0.010055
[11/100]	cv_eval_time=330.16 sec	RMSE=0.105916	R^2=-0.008729
[12/100]	cv_eval_time=332.60 sec	RMSE=0.106702	R^2=-0.006775
[13/100]	cv_eval_time=336.99 sec	RMSE=0.106635	R^2=0.016113
[14/100]	cv_eval_time=329.66 sec	RMSE=0.106594	R^2=0.006906
[15/100]	cv_eval_time=324.41 sec	RMSE=0.107505	R^2=-0.013101
[16/100]	cv_eval_time=339.50 sec	RMSE=0.106847	R^2=0.023502
[

In [None]:
    #remove string and datetime columns
    dfPoly = df.copy()
    dfPoly = dfPoly.select_dtypes(exclude=['object','datetime64'])
    y = dfPoly['score'].copy()
    dfPoly.drop('score', axis=1, inplace=True) 
    dfPoly.drop('user_id', axis=1, inplace=True) 
    #get rid of subjectivity and answer columns
    dfPoly = dfPoly[['user_answers_num', 'user_questions_num', 'user_blogs_num',
       'user_posts_num', 'user_edits_num', 'user_followers_num',
       'user_followings_num', 'user_has_picture', 'user_topics_num',
       'answer_id', 'answer_is_downvoted', 'answer_comments_num',
       'answer_upvotes_num', 'answer_views_num', 'answer_rank',
       'question_fetched_answers_num', 'question_expected_answers_num',
       'question_followers_num', 'question_comments_num',
       'user_fetched_answers_num', 'days_since_epoch', 'days_rescaled',
       'user_z_score', 'user_ff_ratio', 'max_answer_rank', 'answer_rank_ratio',
       'user_top_score_ratio']]
    #Polynomial linear regression 
    degrees = 4
    for d in range(1, degrees+1):
            print("Degree: %s" % d)
            # Create the model, split the sets and fit it
            polynomial_features = PolynomialFeatures(
                degree=d, include_bias=False
            )
            linear_regression = linear_model.LinearRegression()
            model = Pipeline([
                ("polynomial_features", polynomial_features),
                ("linear_regression", linear_regression)
            ])
           
            # create training and testing vars
            X_train, X_test, y_train, y_test = train_test_split(dfPoly, y, test_size=0.2)

            model.fit(X_train, y_train)
            # Calculate the metrics
            train_prediction = model.predict(X_train)
            test_prediction = model.predict(X_test)
            
            print('Polynomial degree: {}'.format(d))
            print('Stats (train | test):')
            print('\tR^2 score:\t\t%.4f\n\t\t\t\t\t%.4f' % (r2_score(y_train, train_prediction),
                                                        r2_score(y_test, test_prediction)))
            print('\tRMSE:\t\t\t%.4f\n\t\t\t\t\t%.4f' % (mean_squared_error(y_train, train_prediction) ** 0.5,
                                                     mean_squared_error(y_test, test_prediction) ** 0.5))
            print('\tMean error:\t\t%.4f\n\t\t\t\t\t%.4f' % (mean_absolute_error(y_train, train_prediction),
                                                         mean_absolute_error(y_test, test_prediction)))
            print('\tPearson:\t\t%.4f\n\t\t\t\t\t%.4f' % (pearsonr(y_train, train_prediction)[0],
                                                         pearsonr(y_test, test_prediction)[0]))
            print('\tSpearman:\t\t%.4f\n\t\t\t\t\t%.4f' % (spearmanr(y_train, train_prediction)[0],
                                                         spearmanr(y_test, test_prediction)[0]))
            print('\tKendallTau:\t\t%.4f\n\t\t\t\t\t%.4f' % (kendalltau(y_train, train_prediction)[0],
                                                         kendalltau(y_test, test_prediction)[0]))
            

Degree: 1
Polynomial degree: 1
Stats (train | test):
	R^2 score:		0.1881
					0.2100
	RMSE:			0.0962
					0.0944
	Mean error:		0.0743
					0.0744
	Pearson:		0.4337
					0.4616
	Spearman:		0.4971
					0.4923
	KendallTau:		0.3442
					0.3406
Degree: 2


Index(['user_answers_num', 'user_questions_num', 'user_blogs_num',
       'user_posts_num', 'user_edits_num', 'user_followers_num',
       'user_followings_num', 'user_has_picture', 'user_topics_num',
       'answer_id', 'answer_is_downvoted', 'answer_comments_num',
       'answer_upvotes_num', 'answer_views_num', 'answer_rank', 'question_id',
       'question_fetched_answers_num', 'question_expected_answers_num',
       'question_followers_num', 'question_comments_num',
       'user_fetched_answers_num', 'days_since_epoch', 'days_rescaled',
       'user_z_score', 'user_ff_ratio', 'max_answer_rank', 'answer_rank_ratio',
       'user_top_score_ratio', 'question_subjectivity', 'answer_smog_index',
       'answer_smog_min_age', 'user_obj_presense', 'user_subj_presense',
       'Unnamed: 0', 'lda_1', 'lda_2', 'lda_3', 'lda_4', 'lda_5', 'lda_6',
       'lda_7', 'lda_8', 'lda_9', 'lda_10', 'lda_11', 'lda_12', 'lda_13',
       'lda_14', 'lda_15', 'lda_16', 'lda_17', 'lda_18', 'lda_19', 'lda_2