In [1]:
import gc
from sklearn.linear_model import Lasso, ElasticNet
from pprint import pprint

import pandas as pd
import numpy as np

from cv import run_cv_model
from utils import print_step, rmse
from cache import get_data, is_in_cache, load_cache, save_in_cache

In [2]:
train, test = get_data()
target = train['deal_probability']
del train
print_step('Importing Data 1/5')
train_, test_ = load_cache('lgb_blender')

print_step('Importing Data 2/5')
train_te_lgb2, test_te_lgb2 = load_cache('lgb_blender_poisson')
train_['lgb_blender_poisson'] = train_te_lgb2['lgb_blender_poisson']
test_['lgb_blender_poisson'] = test_te_lgb2['lgb_blender_poisson']

print_step('Importing Data 3/5')
train_te_lgb2, test_te_lgb2 = load_cache('flat_blender_lgb')
train_['flat_blender_lgb'] = train_te_lgb2['flat_blender_lgb']
test_['flat_blender_lgb'] = test_te_lgb2['flat_blender_lgb']

print_step('Importing Data 4/5')
train_te_lgb2, test_te_lgb2 = load_cache('MLP_blender')
train_['MLP_blender'] = train_te_lgb2['MLP_blender']
test_['MLP_blender'] = test_te_lgb2['MLP_blender']

print_step('Importing Data 5/5')
train_te_lgb2, test_te_lgb2 = load_cache('lasso_blender')
train_['lasso_blender'] = train_te_lgb2['lasso_blender']
test_['lasso_blender'] = test_te_lgb2['lasso_blender']

Train shape: (1503424, 18)
Test shape: (508438, 17)
[2018-06-27 00:11:02.442663] Importing Data 1/5
Test shape: (508438, 1)
Train shape: (1503424, 1)
[2018-06-27 00:11:02.725401] Skipped... Loaded cache/train_lgb_blender.csv and cache/test_lgb_blender.csv from cache!
[2018-06-27 00:11:02.725468] Importing Data 2/5
Test shape: (508438, 1)
Train shape: (1503424, 1)
[2018-06-27 00:11:03.009363] Skipped... Loaded cache/train_lgb_blender_poisson.csv and cache/test_lgb_blender_poisson.csv from cache!
[2018-06-27 00:11:03.011031] Importing Data 3/5
Test shape: (508438, 1)
Train shape: (1503424, 1)
[2018-06-27 00:11:03.295185] Skipped... Loaded cache/train_flat_blender_lgb.csv and cache/test_flat_blender_lgb.csv from cache!
[2018-06-27 00:11:03.297227] Importing Data 4/5
Test shape: (508438, 1)
Train shape: (1503424, 1)
[2018-06-27 00:11:03.553160] Skipped... Loaded cache/train_MLP_blender.csv and cache/test_MLP_blender.csv from cache!
[2018-06-27 00:11:03.555008] Importing Data 5/5
Test shape

In [3]:
train_.corr()

Unnamed: 0,lgb_blender,lgb_blender_poisson,flat_blender_lgb,MLP_blender,lasso_blender
lgb_blender,1.0,0.998347,0.995402,0.993761,0.993383
lgb_blender_poisson,0.998347,1.0,0.995182,0.994049,0.994027
flat_blender_lgb,0.995402,0.995182,1.0,0.991986,0.991471
MLP_blender,0.993761,0.994049,0.991986,1.0,0.993749
lasso_blender,0.993383,0.994027,0.991471,0.993749,1.0


In [4]:
test_.corr()

Unnamed: 0,lgb_blender,lgb_blender_poisson,flat_blender_lgb,MLP_blender,lasso_blender
lgb_blender,1.0,0.999178,0.997401,0.997036,0.995459
lgb_blender_poisson,0.999178,1.0,0.997059,0.997146,0.995885
flat_blender_lgb,0.997401,0.997059,1.0,0.995838,0.993997
MLP_blender,0.997036,0.997146,0.995838,1.0,0.996633
lasso_blender,0.995459,0.995885,0.993997,0.996633,1.0


In [8]:
params = {
    "alpha": 1e-5,
    "max_iter": 1500,
    "positive": True
}
def runLasso(train_X, train_y, val_X, val_y, test_X, params):
    model = Lasso(**params)
    model.fit(train_X, train_y)
    for i in zip(train_X.columns, model.coef_):
        print(i)
    print_step('Predict Val 1/2')
    pred_val_y = model.predict(val_X)
    print_step('Predict Test 2/2')
    pred_test_y = model.predict(test_X)
    return pred_val_y, pred_test_y

In [9]:
results = run_cv_model(train_, test_, target, runLasso, params, rmse, 'lasso_blender')

[2018-06-27 00:12:24.678805] Started lasso_blender fold 1/5
('lgb_blender', 0.2045994215589344)
('lgb_blender_poisson', 0.0)
('flat_blender_lgb', 0.5551118854708563)
('MLP_blender', 0.2485830449400316)
('lasso_blender', 0.0)
[2018-06-27 00:12:40.122879] Predict Val 1/2
[2018-06-27 00:12:40.124669] Predict Test 2/2
[2018-06-27 00:12:40.139229] lasso_blender cv score 1 : 0.21046092309927614
[2018-06-27 00:12:40.151046] Started lasso_blender fold 2/5
('lgb_blender', 0.1614197860633351)
('lgb_blender_poisson', 0.0)
('flat_blender_lgb', 0.5930971677649242)
('MLP_blender', 0.2497932878773276)
('lasso_blender', 0.0)
[2018-06-27 00:12:54.739858] Predict Val 1/2
[2018-06-27 00:12:54.741246] Predict Test 2/2
[2018-06-27 00:12:54.754110] lasso_blender cv score 2 : 0.20948924805148497
[2018-06-27 00:12:54.765869] Started lasso_blender fold 3/5
('lgb_blender', 0.20577932629479967)
('lgb_blender_poisson', 0.0)
('flat_blender_lgb', 0.5466192927347319)
('MLP_blender', 0.25514056380043815)
('lasso_blen

In [None]:
#### New L2 mdoels
# 1e-5
# 0.2097412787457186
# 5e-5
# 0.2097415142905284
#### Old L2 mdoels
# 1e-4
# 0.20980553551613146
# 1e-5
# 0.20980429389524752

In [10]:
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
print_step('Prepping submission file')
submission = pd.read_csv(f'data/test.csv', usecols=['item_id'])
submission['deal_probability'] = results['test'].clip(0.0, 1.0)
submission.to_csv('submit/submit_lasso_L3_blender.csv', index=False)
print_step('Done!')

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[2018-06-27 00:16:00.403682] Prepping submission file
[2018-06-27 00:16:02.544454] Done!
