In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm

import gc
import pickle
from os import path

np.random.seed(0)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer, accuracy_score
from ml_metrics import rmse

from functools import partial
from hyperopt import hp
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

import xgboost as xgb


import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
def beginFE():
    
    for one in globals().copy().keys():
        if one.endswith('_feats'):
            del globals()[one]
    if 'feats' in globals():
        del globals()['feats']
    
    train = pd.read_hdf('train_warsaw.h5')
    train = train.rename(columns={'timestamp': 'date'}).reset_index()
    train.drop( columns=['timestamp'], inplace = True)
    test = pd.read_hdf('test_warsaw.h5')
    test = test.rename(columns={'timestamp': 'date'}).reset_index()
    test.drop( columns=['timestamp'], inplace = True)
    
    df = pd.concat([train, test], sort=True)
    df.sort_values(['date', 'id'], inplace=True)
        
    df['hour']    = df.date.dt.hour
    df['day_m']   = df.date.dt.day
    df['month']   = df.date.dt.month
    df['year']    = df.date.dt.year
    df['week_y']  = df.date.dt.week
    df['day_y']   = df.date.dt.dayofyear
    df['day_w']   = df.date.dt.dayofweek
    df['quarter'] = df.date.dt.quarter
    df['hour_w']  = df.date.dt.hour + df.date.dt.dayofweek * 24
    
    df['pm25_log']      = (np.log(df[ df['pm25']>0 ]['pm25']+1))
    
    time_feats = ['quarter', 'month', 'week_y', 'day_y', 'day_m', 'day_w', 'hour', 'hour_w', 'year']
    black_list_feats = ['id', 'pm25', 'pm25_log', 'is_non_cont', 'id_dif', 'd_time', 'cont_nr']
    
    return df, time_feats, black_list_feats
    
def endFE(df, black_list_feats):

    # listy kawałków
    def count_cont_nr(flag):
        global value
        if flag:
            value = 1
        else:
            value += 1
        return int(value)
    
    df['is_non_cont'] = df.shift(1).id != (df.id - 1)
    df['id_dif'] = (df.id - df.shift(1).id).fillna(0).astype(int)
    df['d_time'] = df.date - df.shift(1).date
    df.reset_index(drop=True, inplace=True)
    df['cont_nr'] = df.apply(lambda x: count_cont_nr(x['is_non_cont']), axis=1)
    begin_idx = df.loc[df['is_non_cont']].index.values.astype(int)
    end_idx = (df.shift(1).loc[df['is_non_cont']]['cont_nr'].shift(-1) + 
               df.loc[df['is_non_cont']].index.values).values.astype(int)
    end_idx[-1] = int(len(df))
    parts = []
    for i in range(len(begin_idx)):
        parts.append( df.iloc[ begin_idx[i] : (end_idx[i]) ].copy() )
        
    train, test = df[ ~df.pm25.isnull() ], df[ df.pm25.isnull() ]
    
    num_feats = df.select_dtypes(include=[np.number, bool]).columns.values
    num_feats = [feat for feat in num_feats if feat not in black_list_feats]
    omit_feats = [x for x in df.columns if x not in num_feats]
            
#     df.fillna(-1, inplace=True)
#     df = df.iloc[24:,]
    
    return df, train, test, parts, num_feats, omit_feats
       
def print_feat_groups():   
    feats_list = ['black_list_feats', 'omit_feats', 'num_feats', 'time_feats']
    for one in feats_list:
        if one in globals():
            arr_feats = globals()[one]
            if arr_feats:
                print('--------------------------------------------------')
                print(one, arr_feats)
    print('--------------------------------------------------')

In [3]:
def feature_engineering_avg_no_shift(avg_len):
    
    df, time_feats, black_list_feats = beginFE()
    
    df['windSpeed_log'] = (np.log(df[ df['windSpeed']>0 ]['windSpeed']+5))
    df['windGust_log']  = (np.log(df[ df['windGust']>0  ]['windGust']+2))
    
    obj_feats = df.select_dtypes(np.object).columns
    for feat in obj_feats:
        df[feat + '_cat'] = df[feat].factorize()[0]
        
    df['avg_mov'] = df['pm25'].rolling( window=avg_len, min_periods=1).mean()
    df['avg_mov'] = df.apply(lambda x: np.nan if np.isnan(x['pm25']) else x['avg_mov'], axis=1)
    df['avg_diff'] = df.apply(lambda x: x['pm25']-x['avg_mov'], axis=1)
    
#     df['avg_diff'][df['avg_diff'] > 30] = 30                                   # cap value
#     df['avg_diff'][df['avg_diff'] < -30] = -30
    
    df, train, test, parts, num_feats, omit_feats = endFE(df, black_list_feats)

    return df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats

In [4]:
def feature_engineering_aggr_no_shift(avg_len=6, roll_nr=[6, 12], agr_fn=['min', 'max', 'mean', 'median']):
    
    df, time_feats, black_list_feats = beginFE()
    
    df['windSpeed_log'] = (np.log(df[ df['windSpeed']>0 ]['windSpeed']+5))
    df['windGust_log']  = (np.log(df[ df['windGust']>0  ]['windGust']+2))
    
    obj_feats = df.select_dtypes(np.object).columns
    for feat in obj_feats:
        df[feat + '_cat'] = df[feat].factorize()[0]
        
    past_feats = df.select_dtypes(include=[np.number, bool]).columns.values
    past_feats = [feat for feat in past_feats if feat not in black_list_feats]
    past_feats = [feat for feat in past_feats if feat not in time_feats]
    
    roll_columns = [feat for feat in past_feats if not feat.endswith('_cat')]   
    
    for r_feat in roll_columns:
        for fn in agr_fn:
            for nr in roll_nr:    
                suffix = '_roll{}{}'.format(nr, fn)
                df[r_feat + suffix] = getattr(df[r_feat].rolling(nr), fn)()
    roll_feats = [feat for feat in df.columns if '_roll' in feat]
        
    df['avg_mov'] = df['pm25'].rolling( window=avg_len, min_periods=1).mean()
    df['avg_mov'] = df.apply(lambda x: np.nan if np.isnan(x['pm25']) else x['avg_mov'], axis=1)
    df['avg_diff'] = df.apply(lambda x: x['pm25']-x['avg_mov'], axis=1)
    
#     df['avg_diff'][df['avg_diff'] > 30] = 30                                   # cap value
#     df['avg_diff'][df['avg_diff'] < -30] = -30
    
    df, train, test, parts, num_feats, omit_feats = endFE(df, black_list_feats)

    return df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats

In [5]:
# możliwość podania dowolnej ilosci punktów do predykcji

def run_model_nr(nr, model_selected, model_params, parts_to_go, feats_selected, y_log):
    
    train_df, y_train_all = pd.DataFrame(columns=feats_selected), np.array([])
    last_days_df, y_last_days, y_last_pred, y_pred_all = train_df.copy(), np.array([]), np.array([]), np.array([])
    models, global_scores, last_day_scores = [], np.array([]), np.array([])

    for i in np.arange(nr):
        models.append(model_selected(**model_params))          # odzielne modele dla 24h
    
    for train in tqdm(parts_to_go):                               # kolejne bloki danych
  
        X = train[feats_selected].values
        y = (train['pm25'].values).astype(np.float64)
        if y_log:
            y = (train['pm25_log'].values).astype(np.float64)
   
        X_last  = X[-25:-24]                                    # ostatni wiersz z bloku danych
        X_train = X[0:-25]                                      # pomija zbiór testowy i ostatni wiersz
        
        for i in np.arange(nr):                                 # każdy model szkolony oddzielnie
            y = pd.Series(y).shift(-1).values                   # przesuwa pm25 o 1h w przeszłość
            y_train = y[0:-25]                       
            models[i].fit(X_train, y_train)                        
            y_last = (models[i].predict(X_last)).astype(np.float64)
            y[-25] = y_last[0]                                  # prognoza na 1h
        
        y_pred = y[-nr-24:-24]                                  # prognoza na 24h
        if y_log:
            y_pred = np.exp(y_pred)-1
         
        #last day CV
        
        X_last_day = X[-nr-25:-nr-24]                           # godzina poprzedzająca ostatni dzień z danymi
        y_last_day = train.pm25[-nr-25:-25].values
        y_last_p = np.array([])
        for i in np.arange(nr):                     
            y_hour = (models[i].predict(X_last_day)).astype(np.float64)
            y_last_p = np.concatenate((y_last_p, y_hour))
        if y_log:
            y_last_p = np.exp(y_last_p)-1
        last_day_scores = np.append( last_day_scores, [rmse(y_last_day, y_last_p)] )
            
        train_df     = train_df + train[feats_selected][0:-25]
        last_days_df = last_days_df + train[feats_selected][-nr-25:-25]
        y_train_all  = np.concatenate((y_train_all, train.pm25[0:-25].values))
        y_last_days  = np.concatenate((y_last_days, y_last_day))
        y_last_pred  = np.concatenate((y_last_pred, y_last_p))
        y_pred_all   = np.concatenate((y_pred_all, y_pred))
        
        # CV
        
        side = 2  # pozwala uniknąć cyklicznych podobieństw parametru - dla 2 sprawdza 5 sąsiednich wartosci
        part_scores = np.array([])
        for i in np.arange(nr):                         # waliduje każdy model osobno
            scores = np.array([])
            for shift in np.arange(-i-side, -i+side):         # sprawdza przesunięcia wykresu - kiedy bład będzie najmniejszy?
                X_cv = train[feats_selected][0:-25].values
                y_cv = train.pm25[0:-25].values
                if y_log:
                    y_cv = train.pm25_log[0:-25].values
                y_cv = pd.Series(y_cv).shift(shift).values    # przesuwa y w przeszłośc (wartości ujemne)
                y_pred_cv = (models[i].predict(X_cv)).astype(np.float64)
                if y_log: 
                    y_cv = np.exp(y_cv)-1
                    y_pred_cv = np.exp(y_pred_cv)-1
                scores = np.append( scores, [rmse(y_cv[side:-side-nr], y_pred_cv[side:-side-nr])] )
            part_scores = np.append( part_scores, [scores[scores.argmin()]] )
#         print(part_scores.mean(), part_scores.std())

        global_scores = np.append( global_scores, part_scores )
    print(global_scores.mean(), last_day_scores.mean(), rmse(y_last_days, y_last_pred), 
          r2_score(y_last_days, y_last_pred), model_params)      
    return global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred

In [6]:
# pomija ostatni okres w zbiorze testowym i używa go do walidacji

def run_model_day(nr, model_selected, model_params, parts_to_go, feats_selected, y_log):
    
    train_df, y_train_all = pd.DataFrame(columns=feats_selected), np.array([])
    last_days_df, y_last_days, y_last_pred, y_pred_all = train_df.copy(), np.array([]), np.array([]), np.array([])
    models, global_scores, last_day_scores = [], np.array([]), np.array([])

    for i in np.arange(nr):
        models.append(model_selected(**model_params))          # odzielne modele dla 24h
    
    for train in tqdm(parts_to_go):                               # kolejne bloki danych
  
        X = train[feats_selected].values
        y = (train['pm25'].values).astype(np.float64)
        if y_log:
            y = (train['pm25_log'].values).astype(np.float64)
   
        X_last  = X[-25:-24]                                    # ostatni wiersz z bloku danych
        X_train = X[0:-nr-25]                                   # pomija zbiór testowy, ostatni wiersz i ostatni dzień (CV)
        
        for i in np.arange(nr):                                 # każdy model szkolony oddzielnie
            y = pd.Series(y).shift(-1).values                   # przesuwa pm25 o 1h w przeszłość
            y_train = y[0:-nr-25]                       
            models[i].fit(X_train, y_train)                        
            y_last = (models[i].predict(X_last)).astype(np.float64)
            y[-25] = y_last[0]                                  # prognoza na 1h
        
        y_pred = y[-nr-24:-24]                                     # prognoza na 24 h (nr)
        if y_log:
            y_pred = np.exp(y_pred)-1
        
        #last day CV
        
        X_last_day = X[-nr-25:-nr-24]                                 # godzina poprzedzająca ostatni dzień z danymi
        y_last_day = train.pm25[-nr-25:-25].values
        y_last_p = np.array([])
        for i in np.arange(nr):                     
            y_hour = (models[i].predict(X_last_day)).astype(np.float64)
            y_last_p = np.concatenate((y_last_p, y_hour))
        if y_log:
            y_last_p = np.exp(y_last_p)-1
        last_day_scores = np.append( last_day_scores, [rmse(y_last_day, y_last_p)] )
            
        train_df     = train_df + train[feats_selected][0:-nr-25]
        last_days_df = last_days_df + train[feats_selected][-nr-25:-25]
        y_train_all  = np.concatenate((y_train_all, train.pm25[0:-nr-25].values))
        y_last_days  = np.concatenate((y_last_days, y_last_day))
        y_last_pred  = np.concatenate((y_last_pred, y_last_p))
        y_pred_all   = np.concatenate((y_pred_all, y_pred))
        
        # CV
        
        side = 2  # pozwala uniknąć cyklicznych podobieństw parametru - dla 2 sprawdza 5 sąsiednich wartosci
        part_scores = np.array([])
        for i in np.arange(nr):                         # waliduje każdy model osobno
            scores = np.array([])
            for shift in np.arange(-i-side, -i+side):         # sprawdza przesunięcia wykresu - kiedy bład będzie najmniejszy?
                X_cv = train[feats_selected][0:-nr-25].values
                y_cv = train.pm25[0:-nr-25].values
                if y_log:
                    y_cv = train.pm25_log[0:-nr-25].values
                y_cv = pd.Series(y_cv).shift(shift).values    # przesuwa y w przeszłośc (wartości ujemne)
                y_pred_cv = (models[i].predict(X_cv)).astype(np.float64)
                if y_log: 
                    y_cv = np.exp(y_cv)-1
                    y_pred_cv = np.exp(y_pred_cv)-1
                scores = np.append( scores, [rmse(y_cv[side:-side-nr], y_pred_cv[side:-side-nr])] )
            part_scores = np.append( part_scores, [scores[scores.argmin()]] )
#         print(part_scores.mean(), part_scores.std())

        global_scores = np.append( global_scores, part_scores )
    print(global_scores.mean(), last_day_scores.mean(), rmse(y_last_days, y_last_pred), 
          r2_score(y_last_days, y_last_pred), model_params)      
    return global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred

In [65]:
%%time
name = 'avg_10_2.p'
log_y = False
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = feature_engineering_avg_no_shift(avg_len=24)
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected'  : xgb.XGBRegressor,
    'model_params' : {'n_jobs': 8,
                       'random_state': 123,
                       'objective': 'reg:squarederror',    #reg:squarederror reg:linear
                       'max_depth': 5,
                       'subsample': 0.95},
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}

global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)

result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 43) (38952, 43) (1200, 43) 50 32


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.2761247281532098 3.2419481413399622
Wall time: 5min


In [67]:
%%time
name = 'avg_15_2.p'
log_y = False
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = feature_engineering_avg_no_shift(avg_len=24)
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'n_jobs': 8,
                       'random_state': 123,
                       'objective': 'reg:squarederror',    #reg:squarederror reg:linear
                       'max_depth': 5,
                       'subsample': 0.7095712157848493,
                       'learning_rate': 0.19354780543695485,
                       'n_estimators': 100,
                      },
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)

result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 43) (38952, 43) (1200, 43) 50 32


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.6517045664260221 3.420822969652881
Wall time: 4min 58s


In [120]:
%%time
name = 'avg_16_2.p'
log_y = False
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = feature_engineering_avg_no_shift(avg_len=24)
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'n_jobs': 8,
                       'random_state': 123,
                       'objective': 'reg:squarederror',    #reg:squarederror reg:linear
                       'max_depth': 5,
                       'subsample': 0.7798042230398718,
                       'learning_rate': 0.13096597615153277,
                       'n_estimators': 100,
                      },
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)

result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 43) (38952, 43) (1200, 43) 50 32


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.9678364920842439 3.3157616794427778
Wall time: 4min 58s


In [176]:
%%time
name = 'avg_14_2.p'
log_y = True
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = \
    feature_engineering_aggr_no_shift(avg_len=6, roll_nr=[6, 12], agr_fn=['min', 'max', 'mean', 'median'])
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'n_jobs': 8,
                       'random_state': 123,
                       'objective': 'reg:squarederror',    #reg:squarederror reg:linear
                       'max_depth': 5,
                       'subsample': 0.95},
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)
result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 187) (38952, 187) (1200, 187) 50 176


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.1597586351388356 3.2445695198384703
Wall time: 9min 39s


In [204]:
%%time
name = 'avg_17.p' # 0.3615642925015443
log_y = True
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = feature_engineering_avg_no_shift(avg_len=24)
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'learning_rate': 0.29648114822438026, 
                         'max_depth': 5, 
                         'n_estimators': 100, 
                         'n_jobs': 8, 
                         'objective': 'reg:squarederror', 
                         'random_state': 642, 
                         'subsample': 0.7563555961578369, 
                        },
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)

result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 43) (38952, 43) (1200, 43) 50 32


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.4394181220894271 3.5109222141437435
Wall time: 4min 56s


In [216]:
# ostani dzień w zbiorze treningowym

%%time
name = 'avg_18.p' # 0.33983779948831566
log_y = False
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = \
    feature_engineering_aggr_no_shift(avg_len=6, roll_nr=[24], agr_fn=['median'])
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'learning_rate': 0.2476843816313341, 
                         'max_depth': 5, 
                         'n_estimators': 100, 
                         'n_jobs': 8, 
                         'objective': 'reg:squarederror', 
                         'random_state': 70, 
                         'subsample': 0.8561623057645249, 
                        },
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_nr(**with_params)
result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
with open(name, 'wb') as f:
    pickle.dump(result, f)
test['pm25'] = y_pred_all
test[ ['id', 'pm25'] ].to_csv('late_submit_{}.csv'.format(name[:-2]), index=False)

(40152, 61) (38952, 61) (1200, 61) 50 50


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.3824712141744277 3.4901844422913637
Wall time: 5min 32s


In [255]:
# ostatni dzień odcięty do walidacji

%%time
name = 'avg_18.p' # 0.33983779948831566
log_y = False
df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = \
    feature_engineering_aggr_no_shift(avg_len=6, roll_nr=[24], agr_fn=['median'])
feats = num_feats
print(df.shape, train.shape, test.shape, len(parts), len(feats))
with_params = {
    'model_selected': xgb.XGBRegressor,
    'model_params'  : {'learning_rate': 0.2476843816313341, 
                         'max_depth': 5, 
                         'n_estimators': 100, 
                         'n_jobs': 8, 
                         'objective': 'reg:squarederror', 
                         'random_state': 70, 
                         'subsample': 0.8561623057645249, 
                        },
    'parts_to_go'     : parts, 
    'feats_selected'  : feats,
    'y_log'           : log_y,
    'nr'              : 24
}
global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
    run_model_day(**with_params)
# result = global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred
# with open(name, 'wb') as f:
#     pickle.dump(result, f)
# test['pm25'] = y_pred_all
# test[ ['id', 'pm25'] ].to_csv('submit_{}.csv'.format(name[:-2]), index=False)

(40152, 61) (38952, 61) (1200, 61) 50 50


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.3660301006596656 3.783456633889155
Wall time: 5min 28s


In [184]:
# poszukiwanie optymalnych parametrów dla modeli agregacyjnych, walidacja shift 

def space_to_param(space):
    int_param = ['max_depth', 'random_state', 'min_child_weight', 'n_estimators', 'n_jobs']
    
    params  = {}
    for x in space:
        if x == 'y_log':            y_log  = space[x]
        if x == 'avg_len':        avg_len  = int(space[x])
        if x == 'roll_nr':        roll_nr  = space[x]
        if x == 'agr_fn':          agr_fn  = space[x]    
        if x in int_param:       params[x] = int(space[x])   
        else:                    params[x] = space[x]            
    return y_log, params, roll_nr, agr_fn, avg_len

def param_opt(space):

    y_log, params, roll_nr, agr_fn, avg_len = space_to_param(space)
    
    df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = \
        feature_engineering_aggr_no_shift(avg_len=avg_len, roll_nr=roll_nr, agr_fn=agr_fn)
    feats = num_feats
    
    with_params = {
        'model_params'   : params,
        'parts_to_go'    : parts,
        'y_log'          : y_log,
        'feats_selected' : feats,
        'nr'             : 24,
        'model_selected' : xgb.XGBRegressor,
    }
      
    global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
        run_model_day(**with_params)

    return{'loss': last_day_scores.mean(), 'status': STATUS_OK }

space ={
    'subsample'      : hp.uniform('x_subsample', 0.7, 1.),
    'learning_rate'  : hp.uniform('x_learning_rate', 0.05, 0.3),
    'random_state'   : hp.choice('x_random_state', range(1, 1000, 1)),
    'max_depth'      : hp.choice('x_max_depth', range(5, 8, 1)),
    'roll_nr'        : hp.choice('x_roll_nr', [[6], [12], [18], [24], [6, 12], [12, 18], [18, 24], \
                                               [6, 24], [6, 18], [12, 24]]),
    'agr_fn'         : hp.choice('x_agr_fn', [['mean'], ['median'], ['mean', 'median']]),
    'n_estimators'   : hp.choice('x_n_estimators', range(50, 300, 50)),
    'objective'      : 'reg:squarederror',
    'n_jobs'         : 8,
    'y_log'          : hp.choice('x_y_log', [True, False]),
    'avg_len'        : hp.choice('x_avg_len', [6, 12, 18, 24]),
}

trials = Trials()
best_params = fmin(fn=param_opt,
            space=space,
            algo=partial(tpe.suggest, n_startup_jobs=1),
            max_evals=1,
            trials=trials)

print("The best params: ", best_params)

  0%|                                                                           | 0/1000 [00:00<?, ?it/s, best loss: ?]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

global                                                                                                                 
0.417537515679709                                                                                                      
0.03592639798340989                                                                                                    
0.417537515679709                                                                                                      
{'agr_fn': ('mean',), 'learning_rate': 0.29581950802867857, 'max_depth': 6, 'n_estimators': 50, 'n_jobs': 8, 'objective': 'reg:squarederror', 'random_state': 478, 'roll_nr': (24,), 'subsample': 0.7958088429469709, 'y_log': False}
False                                                                                                                  
  0%|                                               | 1/1000 [03:47<63:03:28, 227.24s/it, best loss: 0.417537515679709]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

global                                                                                                                 
0.42976965559482466                                                                                                    
0.03156378103942302                                                                                                    
0.42976965559482466                                                                                                    
{'agr_fn': ('mean',), 'learning_rate': 0.2942042616154599, 'max_depth': 6, 'n_estimators': 50, 'n_jobs': 8, 'objective': 'reg:squarederror', 'random_state': 478, 'roll_nr': (24,), 'subsample': 0.7813903195731711, 'y_log': False}
False                                                                                                                  
  0%|                                               | 2/1000 [07:31<62:44:49, 226.34s/it, best loss: 0.417537515679709]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

global                                                                                                                 
0.6869114522928842                                                                                                     
0.046086315369067286                                                                                                   
0.6869114522928842                                                                                                     
{'agr_fn': ('median',), 'learning_rate': 0.28299795530141997, 'max_depth': 5, 'n_estimators': 50, 'n_jobs': 8, 'objective': 'reg:squarederror', 'random_state': 228, 'roll_nr': (18,), 'subsample': 0.9918462352523911, 'y_log': False}
False                                                                                                                  
  0%|▏                                              | 3/1000 [10:36<59:12:37, 213.80s/it, best loss: 0.417537515679709]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




KeyboardInterrupt: 

In [320]:
# poszukiwanie optymalnych parametrów dla modeli tylko ze średnią z 'pm25'

def space_to_param(space):
    int_param = ['max_depth', 'random_state', 'min_child_weight', 'n_estimators', 'n_jobs']
    params  = {}
    for x in space:
        if x == 'y_log':         y_log  = space[x]
        if x == 'avg_len':       avg_len  = int(space[x])
        if x in int_param:       params[x] = int(space[x])   
        else:                    params[x] = space[x]            
    return y_log, params, avg_len

def param_opt(space):
    
    y_log, params, avg_len = space_to_param(space)
        
    df, train, test, parts, num_feats, omit_feats, time_feats, black_list_feats = \
        feature_engineering_avg_no_shift(avg_len=avg_len)
    feats = num_feats

    with_params = {
        'model_params'   : params,
        'parts_to_go'    : parts,
        'y_log'          : y_log,
        'feats_selected' : feats,
        'nr'             : 24,
        'model_selected' : xgb.XGBRegressor,
    }
    
    global_scores, last_day_scores, models, y_pred_all, train_df, last_days_df, y_train_all, y_last_days, y_last_pred = \
        run_model_day(**with_params)

    return{'loss': last_day_scores.mean(), 'status': STATUS_OK }

space ={
    'subsample'      : hp.uniform('x_subsample', 0.7, 1.),
    'learning_rate'  : hp.uniform('x_learning_rate', 0.05, 0.3),
    'random_state'   : hp.choice('x_random_state', range(1, 1000, 1)),
    'max_depth'      : hp.choice('x_max_depth', range(5, 8, 1)),
    'n_estimators'   : hp.choice('x_n_estimators', range(50, 300, 50)),
    'objective'      : 'reg:squarederror',
    'n_jobs'         : 8,
    'y_log'          : hp.choice('x_y_log', [True, False]),
    'avg_len'        : hp.choice('x_avg_len', [6, 12, 18, 24])
}

trials = Trials()
best_params = fmin(fn=param_opt,
            space=space,
            algo=partial(tpe.suggest, n_startup_jobs=1),
            max_evals=1,
            trials=trials)

print("The best params: ", best_params)

  0%|                                                                           | 0/1000 [00:00<?, ?it/s, best loss: ?]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.14956424567802246                                                                                                    
4.00548671716599                                                                                                       
{'avg_len': 24, 'learning_rate': 0.1830408665038808, 'max_depth': 6, 'n_estimators': 150, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 449, 'subsample': 0.712016545933661, 'y_log': False}
  0%|                                               | 1/1000 [13:12<219:51:02, 792.26s/it, best loss: 4.00548671716599]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.14405718590025487                                                                                                    
3.9312502402659923                                                                                                     
{'avg_len': 24, 'learning_rate': 0.18616093023553912, 'max_depth': 6, 'n_estimators': 150, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 416, 'subsample': 0.7113001023275932, 'y_log': False}
  0%|                                             | 2/1000 [26:33<220:23:43, 795.01s/it, best loss: 3.9312502402659923]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.022231461591235093                                                                                                   
4.9253944149489675                                                                                                     
{'avg_len': 12, 'learning_rate': 0.2942671643423422, 'max_depth': 6, 'n_estimators': 200, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 266, 'subsample': 0.7108946447643851, 'y_log': True}
  0%|▏                                            | 3/1000 [42:20<232:48:55, 840.66s/it, best loss: 3.9312502402659923]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.0515044183133717                                                                                                     
4.175907074567422                                                                                                      
{'avg_len': 24, 'learning_rate': 0.05178415519827352, 'max_depth': 7, 'n_estimators': 100, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 520, 'subsample': 0.988493443014729, 'y_log': False}
  0%|▏                                            | 4/1000 [53:31<218:28:59, 789.70s/it, best loss: 3.9312502402659923]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.39247045428833116                                                                                                    
3.917424268965554                                                                                                      
{'avg_len': 18, 'learning_rate': 0.1661435613994206, 'max_depth': 5, 'n_estimators': 150, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 793, 'subsample': 0.8633761981456445, 'y_log': False}
  0%|▏                                           | 5/1000 [1:03:35<202:51:09, 733.94s/it, best loss: 3.917424268965554]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.8103916407265199                                                                                                     
3.8818835750719263                                                                                                     
{'avg_len': 18, 'learning_rate': 0.05780029322270833, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 415, 'subsample': 0.8771666904765363, 'y_log': False}
  1%|▎                                          | 6/1000 [1:20:34<226:17:37, 819.57s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.1180021908162197                                                                                                     
4.547215498275531                                                                                                      
{'avg_len': 6, 'learning_rate': 0.054036626351572824, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 996, 'subsample': 0.8770507704518143, 'y_log': True}
  1%|▎                                          | 7/1000 [1:37:05<240:13:43, 870.92s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

2.0336296813587764                                                                                                     
4.5067839463800965                                                                                                     
{'avg_len': 18, 'learning_rate': 0.10914545194385247, 'max_depth': 5, 'n_estimators': 50, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 12, 'subsample': 0.9933688261271113, 'y_log': False}
  1%|▎                                          | 8/1000 [1:40:42<185:56:09, 674.77s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.032968974063551926                                                                                                   
4.125016651758744                                                                                                      
{'avg_len': 18, 'learning_rate': 0.2831044707402026, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 702, 'subsample': 0.7940897777874459, 'y_log': False}
  1%|▍                                          | 9/1000 [1:57:43<214:17:09, 778.44s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.07230659055849928                                                                                                    
4.244856599537228                                                                                                      
{'avg_len': 18, 'learning_rate': 0.11025826973227537, 'max_depth': 7, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 127, 'subsample': 0.9304477996541667, 'y_log': True}
  1%|▍                                        | 10/1000 [2:23:05<275:25:01, 1001.52s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.09732042364447269                                                                                                    
4.116298021814871                                                                                                      
{'avg_len': 6, 'learning_rate': 0.24018765528468355, 'max_depth': 5, 'n_estimators': 200, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 792, 'subsample': 0.7961354492519435, 'y_log': False}
  1%|▍                                         | 11/1000 [2:36:33<259:15:18, 943.70s/it, best loss: 3.8818835750719263]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.4047857841315888                                                                                                     
3.826234978362931                                                                                                      
{'avg_len': 12, 'learning_rate': 0.09883313838301275, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 415, 'subsample': 0.9309273247080043, 'y_log': False}
  1%|▌                                          | 12/1000 [2:53:05<262:58:07, 958.19s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.5782804455190304                                                                                                     
4.3530587879787825                                                                                                     
{'avg_len': 12, 'learning_rate': 0.10873406557981666, 'max_depth': 7, 'n_estimators': 100, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 289, 'subsample': 0.9269973506448106, 'y_log': True}
  1%|▌                                          | 13/1000 [3:03:26<234:56:20, 856.92s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.6762891367245                                                                                                        
4.322782200647166                                                                                                      
{'avg_len': 12, 'learning_rate': 0.14542595896780647, 'max_depth': 5, 'n_estimators': 50, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 634, 'subsample': 0.7761754471175877, 'y_log': False}
  1%|▌                                          | 14/1000 [3:07:03<182:07:29, 664.96s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.0021303339506266814                                                                                                  
4.170994108705544                                                                                                      
{'avg_len': 12, 'learning_rate': 0.2320421334598349, 'max_depth': 7, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 181, 'subsample': 0.9339880773752596, 'y_log': False}
  2%|▋                                          | 15/1000 [3:31:47<249:10:18, 910.68s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.8234664527783131                                                                                                     
4.4551060937827245                                                                                                     
{'avg_len': 12, 'learning_rate': 0.08619719876446241, 'max_depth': 5, 'n_estimators': 200, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 728, 'subsample': 0.82843896785321, 'y_log': True}
  2%|▋                                          | 16/1000 [3:45:04<239:34:39, 876.50s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.20874021701331963                                                                                                    
4.100834122361134                                                                                                      
{'avg_len': 6, 'learning_rate': 0.2263572159050302, 'max_depth': 6, 'n_estimators': 100, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 448, 'subsample': 0.960008535970128, 'y_log': False}
  2%|▋                                          | 17/1000 [3:53:46<210:18:54, 770.23s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.6178410329475194                                                                                                     
3.836488379343358                                                                                                      
{'avg_len': 18, 'learning_rate': 0.07218148608501451, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 556, 'subsample': 0.8910000280191815, 'y_log': False}
  2%|▊                                          | 18/1000 [4:11:01<231:45:58, 849.65s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.5466384368559177                                                                                                     
3.8523934236192146                                                                                                     
{'avg_len': 12, 'learning_rate': 0.07937608316643932, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 807, 'subsample': 0.9026148846879282, 'y_log': False}
  2%|▊                                          | 19/1000 [4:29:13<251:19:37, 922.30s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.22763229139899502                                                                                                    
3.82701883681754                                                                                                       
{'avg_len': 18, 'learning_rate': 0.1328448617838009, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 754, 'subsample': 0.8353439009080802, 'y_log': False}
  2%|▊                                          | 20/1000 [4:46:13<259:04:36, 951.71s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.1914188236022737                                                                                                     
3.9120775850062146                                                                                                     
{'avg_len': 12, 'learning_rate': 0.14389776738460006, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 862, 'subsample': 0.8334411081286869, 'y_log': False}
  2%|▉                                          | 21/1000 [5:03:21<265:00:27, 974.49s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.21126508470332045                                                                                                    
3.8361159495278683                                                                                                     
{'avg_len': 18, 'learning_rate': 0.139881946773515, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 965, 'subsample': 0.7545361798852764, 'y_log': False}
  2%|▉                                          | 22/1000 [5:20:19<268:15:52, 987.48s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

1.8078013065166594                                                                                                     
4.407563084137639                                                                                                      
{'avg_len': 18, 'learning_rate': 0.12784809071213843, 'max_depth': 5, 'n_estimators': 50, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 754, 'subsample': 0.8394270256186855, 'y_log': False}
  2%|▉                                          | 23/1000 [5:23:56<205:17:09, 756.43s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.08108000047198308                                                                                                    
4.177735740041598                                                                                                      
{'avg_len': 6, 'learning_rate': 0.20276426458154123, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 941, 'subsample': 0.8138510474669548, 'y_log': False}
  2%|█                                          | 24/1000 [5:40:48<225:54:26, 833.27s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.16581763181948622                                                                                                    
3.965574824045959                                                                                                      
{'avg_len': 24, 'learning_rate': 0.16366796061678313, 'max_depth': 5, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 491, 'subsample': 0.9560930112420284, 'y_log': False}
  2%|█                                          | 25/1000 [5:57:09<237:40:16, 877.56s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.6237940544358539                                                                                                     
4.4818141806229                                                                                                        
{'avg_len': 12, 'learning_rate': 0.0950089793283009, 'max_depth': 6, 'n_estimators': 150, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 499, 'subsample': 0.7574585436485829, 'y_log': True}
  3%|█                                          | 26/1000 [6:09:50<227:57:25, 842.55s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.02732591756873983                                                                                                    
3.9400916157522077                                                                                                     
{'avg_len': 12, 'learning_rate': 0.1265520873914164, 'max_depth': 7, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 415, 'subsample': 0.8552357252558538, 'y_log': False}
  3%|█▏                                        | 27/1000 [6:36:40<289:56:04, 1072.73s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.5644972700018149                                                                                                     
4.09150039771358                                                                                                       
{'avg_len': 18, 'learning_rate': 0.19990184851230347, 'max_depth': 5, 'n_estimators': 100, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 557, 'subsample': 0.9133922110965734, 'y_log': False}
  3%|█▏                                         | 28/1000 [6:43:27<235:42:45, 873.01s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.9734728991662394                                                                                                     
4.063732602843533                                                                                                      
{'avg_len': 24, 'learning_rate': 0.15946959270965674, 'max_depth': 6, 'n_estimators': 50, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 313, 'subsample': 0.9631852931913123, 'y_log': False}
  3%|█▏                                         | 29/1000 [6:48:00<186:56:45, 693.11s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.4851544286343079                                                                                                     
4.643508924803266                                                                                                      
{'avg_len': 6, 'learning_rate': 0.12435187184769086, 'max_depth': 5, 'n_estimators': 200, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 393, 'subsample': 0.7378011514112722, 'y_log': True}
  3%|█▎                                         | 30/1000 [7:01:21<195:26:38, 725.36s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.13085074088032642                                                                                                    
3.9436916164145766                                                                                                     
{'avg_len': 24, 'learning_rate': 0.1850143403236004, 'max_depth': 6, 'n_estimators': 150, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 494, 'subsample': 0.816911158461853, 'y_log': False}
  3%|█▎                                         | 31/1000 [7:14:27<200:09:10, 743.60s/it, best loss: 3.826234978362931]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

0.06578200415073324                                                                                                    
3.812568459538359                                                                                                      
{'avg_len': 12, 'learning_rate': 0.09667849243325057, 'max_depth': 7, 'n_estimators': 250, 'n_jobs': 16, 'objective': 'reg:squarederror', 'random_state': 225, 'subsample': 0.8868362248263212, 'y_log': False}
  3%|█▎                                        | 32/1000 [7:41:27<270:38:33, 1006.52s/it, best loss: 3.812568459538359]

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




KeyboardInterrupt: 