In [1]:
# pandas and numpy for data manipulation
import numpy as np
import pandas as pd

# matplotlib and seaborn for visuilization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Display all the columns of the dataframe
pd.pandas.set_option('display.max_columns',None)

# No warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read in data into dataframe
file_train=r'D:\Machine Learning\Python Projects\Project-3\project\train.csv'

cf_train=pd.read_csv(file_train)

# Returns the number of Rows and Columns in train data
print('shape of train data after preprocessing: {}'.format(cf_train.shape))

shape of train data after preprocessing: (6818, 38)


#### Splittig the data into train, validation and test

In [3]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(cf_train,test_size=0.15,random_state=123)

print('shape of train data: {}'.format(train.shape))
print('shape of test data: {}'.format(test.shape))

shape of train data: (5795, 38)
shape of test data: (1023, 38)


In [4]:
train1, validation = train_test_split(train,test_size=0.2,random_state=456)

print('shape of train1 data: {}'.format(train1.shape))
print('shape of validation data: {}'.format(validation.shape))

shape of train1 data: (4636, 38)
shape of validation data: (1159, 38)


In [5]:
# Separting dependent and independent variables

x_train=train1.drop(['Medicine_ID','Counterfeit_Sales'],axis=1)
y_train=train1['Counterfeit_Sales']

print('shape of x_train data: {}'.format(x_train.shape))
print('shape of y_train data: {}'.format(y_train.shape))

x_validation=validation.drop(['Medicine_ID','Counterfeit_Sales'],axis=1)
y_validation=validation['Counterfeit_Sales']

print('shape of x_validation data: {}'.format(x_validation.shape))
print('shape of y_validation data: {}'.format(y_validation.shape))

x_test=test.drop(['Medicine_ID','Counterfeit_Sales'],axis=1)
y_test=test['Counterfeit_Sales']

print('shape of x_test data: {}'.format(x_test.shape))
print('shape of y_test data: {}'.format(y_test.shape))

shape of x_train data: (4636, 36)
shape of y_train data: (4636,)
shape of x_validation data: (1159, 36)
shape of y_validation data: (1159,)
shape of x_test data: (1023, 36)
shape of y_test data: (1023,)


### Model Building

In [None]:
# Score = 1-(MAE/1660)

### 1: Simple linear Regression

In [8]:
# Fitting Simple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
lr = LinearRegression()
lr.fit(x_train,y_train)

# predict on test
pr=lr.predict(x_test)

# 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
print('Average MAE: {}'.format(np.round(mean_absolute_error(y_test,pr), 3)))
print('Explained variance: {}'.format(r2_score(y_test,pr)))

Average MAE: 835.426
Explained variance: 0.572967593058583


In [9]:
1-(835.426/1660)

0.4967313253012048

### 1.1: Multiple Regresssion

In [11]:
# Fitting Multiple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
lr_cross = LinearRegression()
cv = cross_val_score(lr, x_train, y_train, cv=10, scoring='neg_mean_absolute_error')
print('Average NMAE: {}'.format(np.round(cv.mean(), 3)))
print('Standard Deviation: {}'.format(round(cv.std(), 3)))

Average NMAE: -855.692
Standard Deviation: 61.659


In [12]:
1-(855.692/1660)

0.48452289156626505

### 2: Ridge Regression

In [13]:
# Fitting Rige Regression to the Training set
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

#### 2.1: Hyper parameter tuning using GridSearchCV

In [20]:
# Hyperparameter tuning for getting best alpha value.run GridSearchCV
ridge=Ridge()
params={'alpha':np.linspace(1,10,1000)}
ridge_grid=GridSearchCV(ridge,param_grid=params,scoring='neg_mean_absolute_error',
                       cv=10,verbose=1)
ridge_grid.fit(x_validation,y_validation)

Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   47.7s finished


GridSearchCV(cv=10, estimator=Ridge(),
             param_grid={'alpha': array([ 1.        ,  1.00900901,  1.01801802,  1.02702703,  1.03603604,
        1.04504505,  1.05405405,  1.06306306,  1.07207207,  1.08108108,
        1.09009009,  1.0990991 ,  1.10810811,  1.11711712,  1.12612613,
        1.13513514,  1.14414414,  1.15315315,  1.16216216,  1.17117117,
        1.18018018,  1.18918919,  1.1981982 ,  1.20720721,  1.21621622,
        1.22522523,  1.23423423...
        9.73873874,  9.74774775,  9.75675676,  9.76576577,  9.77477477,
        9.78378378,  9.79279279,  9.8018018 ,  9.81081081,  9.81981982,
        9.82882883,  9.83783784,  9.84684685,  9.85585586,  9.86486486,
        9.87387387,  9.88288288,  9.89189189,  9.9009009 ,  9.90990991,
        9.91891892,  9.92792793,  9.93693694,  9.94594595,  9.95495495,
        9.96396396,  9.97297297,  9.98198198,  9.99099099, 10.        ])},
             scoring='neg_mean_absolute_error', verbose=1)

In [21]:
ridge_grid.best_estimator_

Ridge(alpha=5.1441441441441444)

In [22]:
def report(results,n_top=3):
    for i in range(1,n_top+1):
        candidates = np.flatnonzero(results['rank_test_score']==i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean Validation Score: {0:.8f} (std:{1:.3f})".format(
                results['mean_test_score'][candidate],
                results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [23]:
report(ridge_grid.cv_results_,3)

Model with rank: 1
Mean Validation Score: -786.72376946 (std:59.131)
Parameters: {'alpha': 5.1441441441441444}

Model with rank: 2
Mean Validation Score: -786.72377059 (std:59.128)
Parameters: {'alpha': 5.153153153153153}

Model with rank: 3
Mean Validation Score: -786.72377313 (std:59.126)
Parameters: {'alpha': 5.162162162162162}



In [39]:
1-(786.72376946/1660)

0.5260700183975904

#### 2.2: Fitting the on train data with best parameters and predicing on test data

In [25]:
# Fitting the Ridge model and predicting
ridge_model=Ridge(fit_intercept=True,alpha=5.144)
ridge_model.fit(x_train,y_train)

ridge_pr=ridge_model.predict(x_test)
print('Average MAE: {}'.format(np.round(mean_absolute_error(y_test,ridge_pr), 3)))
print('Explained variance: {}'.format(r2_score(y_test,ridge_pr)))

Average MAE: 835.425
Explained variance: 0.5730697454544821


In [26]:
1-(835.425/1660)

0.4967319277108434

### 3: Lasso Regression

In [28]:
# Fitting Lasso Regression to the Training set
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

#### 3.1: Hyper parameter tuning using GridSearchCV

In [34]:
# Hyperparameter tuning for getting best alpha value.run GridSearchCV
lasso=Lasso()
params={'alpha':np.linspace(5,12,1000)}
lasso_grid=GridSearchCV(lasso,param_grid=params,scoring='neg_mean_absolute_error',
                       cv=10,verbose=1)
lasso_grid.fit(x_validation,y_validation)

Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   44.5s finished


GridSearchCV(cv=10, estimator=Lasso(),
             param_grid={'alpha': array([ 5.        ,  5.00700701,  5.01401401,  5.02102102,  5.02802803,
        5.03503504,  5.04204204,  5.04904905,  5.05605606,  5.06306306,
        5.07007007,  5.07707708,  5.08408408,  5.09109109,  5.0980981 ,
        5.10510511,  5.11211211,  5.11911912,  5.12612613,  5.13313313,
        5.14014014,  5.14714715,  5.15415415,  5.16116116,  5.16816817,
        5.17517518,  5.1821821...
       11.7967968 , 11.8038038 , 11.81081081, 11.81781782, 11.82482482,
       11.83183183, 11.83883884, 11.84584585, 11.85285285, 11.85985986,
       11.86686687, 11.87387387, 11.88088088, 11.88788789, 11.89489489,
       11.9019019 , 11.90890891, 11.91591592, 11.92292292, 11.92992993,
       11.93693694, 11.94394394, 11.95095095, 11.95795796, 11.96496496,
       11.97197197, 11.97897898, 11.98598599, 11.99299299, 12.        ])},
             scoring='neg_mean_absolute_error', verbose=1)

In [35]:
lasso_grid.best_estimator_

Lasso(alpha=9.14114114114114)

In [37]:
report(lasso_grid.cv_results_,3)

Model with rank: 1
Mean Validation Score: -783.48503347 (std:59.793)
Parameters: {'alpha': 9.14114114114114}

Model with rank: 2
Mean Validation Score: -783.48521795 (std:59.791)
Parameters: {'alpha': 9.148148148148149}

Model with rank: 3
Mean Validation Score: -783.48532213 (std:59.800)
Parameters: {'alpha': 9.113113113113112}



In [38]:
1-(783.48503347/1660)

0.5280210641746987

#### 3.2: Fitting the on train data with best parameters and predicing on test data

In [40]:
# Fitting the lasso model and predicting
lasso_model=Lasso(fit_intercept=True,alpha=0.01)
lasso_model.fit(x_train,y_train)

lasso_pr=lasso_model.predict(x_test)
print('Average MAE: {}'.format(np.round(mean_absolute_error(y_test,lasso_pr), 3)))
print('Explained variance: {}'.format(r2_score(y_test,lasso_pr)))

Average MAE: 835.409
Explained variance: 0.5729988744854198


In [41]:
1-(835.409/1660)

0.4967415662650603

In [42]:
list(zip(x_train.columns, lasso_model.coef_))

[('Counterfeit_Weight', -128.7925407369324),
 ('Active_Since', 39.98718165419559),
 ('Medicine_MRP', 3651.3225007994947),
 ('Availability_rating', -208.5059285667867),
 ('DistArea_ID_Area027', 1403.5898760127259),
 ('DistArea_ID_Area013', 129.42591427314025),
 ('DistArea_ID_Area046', 482.7002959144574),
 ('DistArea_ID_Area049', 512.5209745575814),
 ('DistArea_ID_Area035', 580.3291059233453),
 ('DistArea_ID_Area045', 154.74503628688896),
 ('DistArea_ID_Area018', 151.1551897281921),
 ('DistArea_ID_Area017', 392.4721054430795),
 ('DistArea_ID_Area010', -692.8711176343705),
 ('Medicine_Type_Antibiotics', -89.71620474470778),
 ('Medicine_Type_Hreplacements', -216.67665311070851),
 ('Medicine_Type_Antiseptics', -217.44934655447702),
 ('Medicine_Type_OralContraceptives', -228.8318051716932),
 ('Medicine_Type_Antipyretics', -270.13500065867726),
 ('Medicine_Type_Cardiac', -168.34048224053257),
 ('Medicine_Type_Mstablizers', -239.98056816631544),
 ('Medicine_Type_Tranquilizers', -244.8671694142

### 4: Randm Forest

In [115]:
# Fitting Random Forest to the Training set
from time import time
from operator import itemgetter
from scipy.stats import randint as sp_randint
from sklearn.ensemble import RandomForestRegressor

from  sklearn.model_selection import RandomizedSearchCV
clf=RandomForestRegressor(n_jobs=-1,verbose=1,random_state=789)

#### 4.1: Hyper parameter tuning using RandomizedSearchCV

In [116]:

param_dist = {"n_estimators":[400,450,500,550,600],
              'criterion':['mae'],
              "max_depth": [3,5, None],
              "max_features": sp_randint(5,11),
              "min_samples_split": sp_randint(5, 11),
              "min_samples_leaf": sp_randint(5, 11),
              "bootstrap": [True, False]}

In [117]:
# run randomized search
n_iter_search = 50
# n_iter parameter of RandomizedSeacrhCV controls, how many 
# parameter combination will be tried; out of all possible given values

random_search = RandomizedSearchCV(clf, param_distributions=param_dist,cv=10,
                                   scoring='neg_mean_absolute_error',n_iter=n_iter_search)
random_search.fit(x_validation, y_validation)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    6.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 585 out of 600 | elapsed:    3.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    3.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 585 out of 600 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    3.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parall

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    4.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 450 out of 450 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    3.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | el

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    2.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.5s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 485 out of 500 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 485 out of 500 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parall

[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    6.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    6.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    2.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    2.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 435 out of 450 | elapsed:    3.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    3.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 450 out of 450 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 435 out of 450 | elapsed:    3.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    3.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parall

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    2.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 585 out of 600 | elapsed:    3.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    3.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    2.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    2.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.5s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 485 out of 500 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.5s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 435 out of 450 | elapsed:    3.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    3.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 450 out of 450 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    3.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 585 out of 600 | elapsed:    4.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    4.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 535 out of 550 | elapsed:    2.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    2.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parall

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    4.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    4.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend L

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend L

[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    5.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    6.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 600 out of 600 | elapsed:    0.0s f

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.9s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.5s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    1.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 535 out of 550 | elapsed:    3.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    3.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 550 out of 550 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    3.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    3.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    2.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    2.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.5s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 400 out of 400 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend L

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 550 out of 550 | elapsed:    2.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    1.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 450 out of 450 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    1.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | ela

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 450 out of 450 | elapsed:    2.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 450 out of 450 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    5.2s finished


RandomizedSearchCV(cv=10,
                   estimator=RandomForestRegressor(n_jobs=-1, random_state=789,
                                                   verbose=1),
                   n_iter=50,
                   param_distributions={'bootstrap': [True, False],
                                        'criterion': ['mae'],
                                        'max_depth': [3, 5, None],
                                        'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020E20DF3970>,
                                        'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020E20DF3B50>,
                                        'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020E1A374490>,
                                        'n_estimators': [400, 450, 500, 550,
                                                         600]},
                   scoring='neg_mean_absolute_error')

In [118]:
report(random_search.cv_results_,3)

Model with rank: 1
Mean Validation Score: -741.39057624 (std:48.142)
Parameters: {'bootstrap': True, 'criterion': 'mae', 'max_depth': None, 'max_features': 10, 'min_samples_leaf': 5, 'min_samples_split': 5, 'n_estimators': 500}

Model with rank: 2
Mean Validation Score: -742.14506791 (std:48.578)
Parameters: {'bootstrap': True, 'criterion': 'mae', 'max_depth': None, 'max_features': 10, 'min_samples_leaf': 6, 'min_samples_split': 7, 'n_estimators': 600}

Model with rank: 3
Mean Validation Score: -742.46176634 (std:48.833)
Parameters: {'bootstrap': True, 'criterion': 'mae', 'max_depth': None, 'max_features': 10, 'min_samples_leaf': 6, 'min_samples_split': 10, 'n_estimators': 400}



In [119]:
1-(741.39057624/1660)

0.5533791709397591

In [120]:
random_search.best_estimator_

RandomForestRegressor(criterion='mae', max_features=10, min_samples_leaf=5,
                      min_samples_split=5, n_estimators=500, n_jobs=-1,
                      random_state=789, verbose=1)

RandomForestRegressor(criterion='mae', max_features=10, min_samples_leaf=5,
                      min_samples_split=5, n_estimators=500, n_jobs=-1,
                      random_state=789, verbose=1)

#### 4.2: Fitting the on train data with best parameters and predicing on test data

In [121]:
# Fit model on train data and predict

rf=RandomForestRegressor(criterion='mae', max_features=10, min_samples_leaf=5,
                      min_samples_split=5, n_estimators=500, n_jobs=-1,
                      random_state=789, verbose=1)
rf.fit(x_train,y_train)

predicted=rf.predict(x_test)
print('Average MAE= {}'.format(np.round(mean_absolute_error(y_test,predicted), 3)))
print('Explained variance= {}'.format(r2_score(y_test,predicted)))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   24.1s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   56.1s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  1.1min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.1s


Average MAE= 760.896
Explained variance= 0.6029926073375083


[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.1s finished


In [122]:
1-(760.896/1660)

0.5416289156626506

In [123]:
importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_],
             axis=0)
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(x_train.shape[1]):
    print("%d. feature %s (%f)" % (f + 1, list(x_train.columns)[f], importances[indices[f]]))

Feature ranking:
1. feature Counterfeit_Weight (0.480341)
2. feature Active_Since (0.141124)
3. feature Medicine_MRP (0.063275)
4. feature Availability_rating (0.051174)
5. feature DistArea_ID_Area027 (0.040005)
6. feature DistArea_ID_Area013 (0.038047)
7. feature DistArea_ID_Area046 (0.030717)
8. feature DistArea_ID_Area049 (0.028734)
9. feature DistArea_ID_Area035 (0.026458)
10. feature DistArea_ID_Area045 (0.013818)
11. feature DistArea_ID_Area018 (0.008686)
12. feature DistArea_ID_Area017 (0.006215)
13. feature DistArea_ID_Area010 (0.005836)
14. feature Medicine_Type_Antibiotics (0.005515)
15. feature Medicine_Type_Hreplacements (0.005273)
16. feature Medicine_Type_Antiseptics (0.004820)
17. feature Medicine_Type_OralContraceptives (0.004810)
18. feature Medicine_Type_Antipyretics (0.003869)
19. feature Medicine_Type_Cardiac (0.003714)
20. feature Medicine_Type_Mstablizers (0.003654)
21. feature Medicine_Type_Tranquilizers (0.003536)
22. feature Medicine_Type_Analgesics (0.003355)


### 5: Gradient Boosting

In [76]:
# Fitting Gradient Boosting on Training set
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingRegressor

#### 5.1: Hyper parameter tuning using Bayesian Hyperopt

In [77]:
# Hyperparameter tuning using Bayesian Hyperopt
def acc_model(params):
    gbm=GradientBoostingRegressor(**params)
    return cross_val_score(gbm,x_validation,y_validation).mean()

In [78]:
param_space={
    'learning_rate': hp.choice('learning_rate',[0.01,0.02,0.05,0.1,0.2]),
    'n_estimators': hp.choice('n_estimators',range(300,500)),
    'criterion': hp.choice('criterion', ['mae']),
    'max_depth': hp.choice('max_depth', range(1,10)),
    'subsample': hp.choice('subsample',[0.2,0.5,0.8,1]),
    'max_features': hp.choice('max_features',range(5,15))
}

In [79]:
best=0
def f(params):
    global best
    acc = acc_model(params)
    if acc>best:
        best=acc
    print('new best:',best,params)
    return {'loss':-acc, 'status':STATUS_OK}

In [80]:
trials=Trials()
best=fmin(f,param_space,algo=tpe.suggest,max_evals=20,trials=trials)
print('best: {}'.format(best))

new best:                                                                                                              
0.5490862747721232                                                                                                     
{'criterion': 'mae', 'learning_rate': 0.02, 'max_depth': 4, 'max_features': 8, 'n_estimators': 362, 'subsample': 0.8}  
new best:                                                                                                              
0.5490862747721232                                                                                                     
{'criterion': 'mae', 'learning_rate': 0.01, 'max_depth': 3, 'max_features': 5, 'n_estimators': 469, 'subsample': 0.2}  
new best:                                                                                                              
0.5490862747721232                                                                                                     
{'criterion': 'mae', 'learning_rate': 0.

1: best: {'criterion': 'mae', 'learning_rate': 0.05, 'max_depth': 2, 'max_features': 7, 'n_estimators': 336, 'subsample': 1}

2: best: {'criterion': 'mae', 'learning_rate': 0.01, 'max_depth': 6, 'max_features': 10, 'n_estimators':300, 'subsample': 0.5}


#### 5.2: Fitting the on train data with best parameters and predicing on test data

In [81]:
gbm_bayesian=GradientBoostingRegressor(**{'criterion': 'mae', 'learning_rate': 0.01, 'max_depth': 6, 
                                          'max_features': 10, 'n_estimators':300, 'subsample': 0.5})
gbm_bayesian.fit(x_train,y_train)

predict_gbm=gbm_bayesian.predict(x_test)
print('Average MAE= {}'.format(np.round(mean_absolute_error(y_test,predict_gbm), 3)))
print('Explained variance= {}'.format(r2_score(y_test,predict_gbm)))

Average MAE= 762.886
Explained variance= 0.6042175845522708


In [82]:
1-(762.886/1660)

0.5404301204819277

### 6: XGBoost

In [83]:
# Fitting xgboost on Trining set
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score
from xgboost.sklearn import XGBRegressor

#### 6.1: Hyper parameter tuning using Bayesian Hyperopt

In [84]:
# Hyperparameter tuning using Bayesian Hyperopt
def acc_model(params):
    xgb=XGBRegressor(**params)
    return cross_val_score(xgb,x_validation,y_validation).mean()

In [103]:
param_space={
    "n_estimators":hp.choice("n_estimators",[100,200,300,500,600,700]),
    "max_depth": hp.choice("max_depth",[2,3,4,5,6]),
     "learning_rate":hp.choice("learning_rate",[0.0001,0.001,0.01,0.1,0.2]),
    "gamma":hp.choice("gamma",[i/10.0 for i in range(0,5)]),
    "min_child_weight":hp.choice("min_child_weight",[5,6,7,8,9]),
    "subsample":hp.choice("subsample",[i/10.0 for i in range(6,10)]),
    "colsample_bytree":hp.choice("colsample_bytree",[i/10.0 for i in range(3,8)]),
    "reg_alpha":hp.choice("reg_alpha",[10,15,20,30,50]),
    'scale_pos_weight':hp.choice('scale_pos_weight',[2,3,4,5,6,7,8,9])
}

In [None]:
{'colsample_bytree': 0.6, 'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 9, 'n_estimators': 600, 'reg_alpha': 30, 'scale_pos_weight': 2, 'subsample': 0.6}

In [104]:
best=0
def f(params):
    global best
    acc = acc_model(params)
    if acc>best:
        best=acc
    print('new best:',best,params)
    return {'loss':-acc, 'status':STATUS_OK}

In [105]:
trials=Trials()
best=fmin(f,param_space,algo=tpe.suggest,max_evals=100,trials=trials)
print('best: {}'.format(best))

new best:                                                                                                              
0.5680235226879489                                                                                                     
{'colsample_bytree': 0.6, 'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 9, 'n_estimators': 600, 'reg_alpha': 20, 'scale_pos_weight': 9, 'subsample': 0.9}
new best:                                                                                                              
0.5680235226879489                                                                                                     
{'colsample_bytree': 0.3, 'gamma': 0.1, 'learning_rate': 0.0001, 'max_depth': 3, 'min_child_weight': 8, 'n_estimators': 500, 'reg_alpha': 10, 'scale_pos_weight': 6, 'subsample': 0.6}
new best:                                                                                                              
0.5680235226879489                  

{'colsample_bytree': 0.7, 'gamma': 0.0, 'learning_rate': 0.001, 'max_depth': 3, 'min_child_weight': 9, 'n_estimators': 200, 'reg_alpha': 20, 'scale_pos_weight': 3, 'subsample': 0.9}
new best:                                                                                                              
0.5680235226879489                                                                                                     
{'colsample_bytree': 0.6, 'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 5, 'min_child_weight': 8, 'n_estimators': 600, 'reg_alpha': 10, 'scale_pos_weight': 5, 'subsample': 0.9}
new best:                                                                                                              
0.5680235226879489                                                                                                     
{'colsample_bytree': 0.4, 'gamma': 0.4, 'learning_rate': 0.0001, 'max_depth': 6, 'min_child_weight': 9, 'n_estimators': 100, 'reg_alpha': 30, 'scale_pos_weig

0.5695121849153092                                                                                                     
{'colsample_bytree': 0.6, 'gamma': 0.0, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 9, 'n_estimators': 100, 'reg_alpha': 20, 'scale_pos_weight': 6, 'subsample': 0.7}
new best:                                                                                                              
0.5695121849153092                                                                                                     
{'colsample_bytree': 0.4, 'gamma': 0.1, 'learning_rate': 0.0001, 'max_depth': 4, 'min_child_weight': 9, 'n_estimators': 300, 'reg_alpha': 10, 'scale_pos_weight': 5, 'subsample': 0.9}
new best:                                                                                                              
0.5695121849153092                                                                                                     
{'colsample_bytree': 0.6, 'gamma': 0

1: best: {'colsample_bytree': 0.6, 'gamma': 0.3, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 8, 'n_estimators': 100, 'reg_alpha': 15, 'scale_pos_weight': 6, 'subsample': 0.7}

2: best: {'colsample_bytree': 0.5, 'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 4, 'min_child_weight': 7, 'n_estimators': 500, 'reg_alpha': 20, 'scale_pos_weight': 8, 'subsample': 0.7}

#### 6.2: Fitting the on train data with best parameters and predicing on test data

In [106]:
xgb_bayesian=XGBRegressor(**{'colsample_bytree': 0.6, 'gamma': 0.1, 'learning_rate': 0.01, 
                             'max_depth': 3, 'min_child_weight': 9, 'n_estimators': 600, 
                             'reg_alpha': 30, 'scale_pos_weight': 2, 'subsample': 0.6})
xgb_bayesian.fit(x_train,y_train)

predict_xgb=xgb_bayesian.predict(x_test)
print('Average MAE= {}'.format(np.round(mean_absolute_error(y_test,predict_xgb), 3)))
print('Explained variance= {}'.format(r2_score(y_test,predict_xgb)))

Average MAE= 753.817
Explained variance= 0.6184891205480827


In [107]:
1-(753.817/1660)

0.5458933734939759

### 7: KNN

In [114]:
from sklearn.neighbors import KNeighborsRegressor
knn=KNeighborsRegressor()

#### 7.1: Hyper parameter tuning using RandomizedSearchCV

In [126]:
params_dist={'n_neighbors':[10,20,30,40,50],
      'weights':['uniform','distance'],
      }

In [128]:
knn_random=RandomizedSearchCV(knn,param_distributions=params_dist,cv=10,
                              n_iter=50,scoring='neg_mean_absolute_error',verbose=1)
knn_random.fit(x_validation,y_validation)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.3s finished


RandomizedSearchCV(cv=10, estimator=KNeighborsRegressor(), n_iter=50,
                   param_distributions={'n_neighbors': [10, 20, 30, 40, 50],
                                        'weights': ['uniform', 'distance']},
                   scoring='neg_mean_absolute_error', verbose=1)

In [129]:
report(knn_random.cv_results_,3)

Model with rank: 1
Mean Validation Score: -905.08987972 (std:68.074)
Parameters: {'weights': 'uniform', 'n_neighbors': 30}

Model with rank: 2
Mean Validation Score: -916.36506820 (std:68.398)
Parameters: {'weights': 'distance', 'n_neighbors': 30}

Model with rank: 3
Mean Validation Score: -918.46083750 (std:67.747)
Parameters: {'weights': 'uniform', 'n_neighbors': 20}



In [130]:
1-(905.08987972/1660)

0.45476513269879515

In [131]:
knn_random.best_estimator_

KNeighborsRegressor(n_neighbors=30)

#### 7.2: Fitting the on train data with best parameters and predicing on test data

In [132]:
# Fit model on train data and predict

knn_model=KNeighborsRegressor(n_neighbors=30)
knn_model.fit(x_train,y_train)

predicted=knn_model.predict(x_test)
print('Average MAE= {}'.format(np.round(mean_absolute_error(y_test,predicted),3)))
print('Explained variance= {}'.format(r2_score(y_test,predicted)))

Average MAE= 1024.053
Explained variance= 0.35613990443996335


In [133]:
1-(1024.053/1660)

0.3831006024096385

#### 8: Stacking

In [139]:
from sklearn.model_selection import KFold

In [158]:
clf1=RandomForestRegressor(criterion='mae', max_features=10, min_samples_leaf=5,
                      min_samples_split=5, n_estimators=500)
clf2=GradientBoostingRegressor(criterion='mae', learning_rate= 0.01, max_depth=6, max_features=10, 
                               n_estimators=300, subsample= 0.5)
clf3=XGBRegressor(colsample_bytree=0.6, gamma=0.3, learning_rate=0.1, max_depth=3,
                  min_child_weight=8, n_estimators=100, reg_alpha=15, scale_pos_weight=6, 
                  subsample=0.7)


In [159]:
Algos=[clf1,clf2,clf3]

In [160]:
rows=x_train.shape[0]
rows

4636

In [161]:
layer1=pd.DataFrame({'clf1':np.zeros(rows),'clf2':np.zeros(rows),'clf3':np.zeros(rows)})
print('shape of layer1: {}'.format(layer1.shape))

shape of layer1: (4636, 3)


In [152]:
# lets create a dictionary for test
test_layer1={0:{},1:{},2:{}}

In [153]:
kf=KFold(n_splits=5)

In [162]:
fold=1
for train,left_out_chunk in kf.split(x_train):
    print('fold number : ', fold)
    
    for i,clf in enumerate(Algos):
        print('Algo number :',i+1)
        
        x_train_train=x_train.loc[train]
        y_train_train=y_train[train]
        x_train_left_out_chunk=x_train.loc[left_out_chunk]
        
        clf.fit(x_train_train,y_train_train)
        p=clf.predict_proba(x_train_left_out_chunk)[:,1]
        
        layer1.iloc[left_out_chunk,i]=p
        
    fold+=1  
    

fold number :  1
Algo number : 1


AttributeError: 'numpy.ndarray' object has no attribute 'values'