In [1]:
import pandas as pd
import numpy as np

In [2]:
# This will allow you to see all column names & rows when you are doing .head(). None of the column name will be truncated.
# source: https://stackoverflow.com/questions/49188960/how-to-show-all-of-columns-name-on-pandas-dataframe

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [3]:
# source: https://gist.github.com/rozanecm/38f2901c592bdffc40726cb0473318cf
# Function which plays a beep of given duration and frequency.
# Useful for when executing things that need a while to finish, to get notified.
import os
def beep(duration = 1, freq = 1500):
    """ play tone of duration in seconds and freq in Hz. """
    os.system('play --no-show-progress --null --channels 1 synth %s sine %f' % (duration, freq))

In [4]:
train = pd.read_csv('../data/train.csv', usecols=['id','precio'])
test = pd.read_csv('../data/test.csv', usecols=['id'])

In [5]:
# To save predictions.
# There must be a directory ../predictions for this to work as expected.
# source: https://gist.github.com/rozanecm/ee8333741db42b10158b3e0aff3f22aa
import time
def _get_filename(my_name, timestamp):
    return "../predictions/last_pred/" + timestamp + " by " + my_name + ".csv"

def _save_description(authors_name, timestamp, submission_description):
    f = open("../predictions/last_pred/" + authors_name + ".txt","a")
    f.write(timestamp + ": " + submission_description + '\n')
    f.close()

def save_submission(submission_df, authors_name="fcozza", description = "no description.", index=False, header=True):
    timestamp = time.strftime("%Y.%m.%d - %H:%M:%S")
    submission_df.to_csv(_get_filename(authors_name, timestamp), index=index, header=header)
    _save_description(authors_name, timestamp, description)

# Agregando columnas de predicciones anteriores

# target 1 - rf

In [6]:
current_target_name = 'target_1'

In [7]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 00:42:40 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [8]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_1')

In [9]:
train = train.merge(df.rename(columns={'approach_1':current_target_name}), on='id')

# target 2 - rf + one hot + svd

In [10]:
current_target_name = 'target_2'

In [11]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 00:54:05 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [12]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_2')

In [13]:
train = train.merge(df.rename(columns={'approach_1':current_target_name}), on='id')

# target 3 - rf + one hashing vectorizer + svd

In [14]:
current_target_name = 'target_3'

In [15]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 01:13:36 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [16]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_3')

In [17]:
train = train.merge(df.rename(columns={'approach_1':current_target_name}), on='id')

# target 4 - rf + stopwords

In [18]:
current_target_name = 'target_4'

In [19]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 01:41:50 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [20]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_4')

In [21]:
train = train.merge(df.rename(columns={'approach_1':current_target_name}), on='id')

# target 5 -lightgbm

In [22]:
current_target_name = 'target_5'

In [23]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:00:10 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [24]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_5')

In [25]:
train = train.merge(df.rename(columns={'fcozza_target_5':current_target_name}), on='id')

# target 6 - lightgbm with grid search

In [26]:
current_target_name = 'target_6'

In [27]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:02:14 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [28]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_6')

In [29]:
train = train.merge(df.rename(columns={'fcozza_target_6':current_target_name}), on='id')

# target 7 - lightgbm grid search + feat eng

In [30]:
current_target_name = 'target_7'

In [31]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:08:42 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [32]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_7')

In [33]:
train = train.merge(df.rename(columns={'fcozza_target_7':current_target_name}), on='id')

# target 8 - lightgbm log precio

In [34]:
current_target_name = 'target_8'

In [35]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:17:28 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [36]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_8')

In [37]:
train = train.merge(df.rename(columns={'fcozza_target_8':current_target_name}), on='id')

# target 9 - lightgbm log precio y skewed features

In [38]:
current_target_name = 'target_9'

In [39]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:23:33 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [40]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_9')

In [41]:
train = train.merge(df.rename(columns={'fcozza_target_9':current_target_name}), on='id')

# target 10 - xgboost tunned 

In [42]:
current_target_name = 'target_10'

In [43]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:33:00 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [44]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_10')

In [45]:
train = train.merge(df.rename(columns={'fcozza_target_10':current_target_name}), on='id')

# target 11 - lightgbm + todos los features

In [46]:
current_target_name = 'target_11'

In [47]:
df = pd.read_csv('../predictions/last_pred/2019.12.02 - 02:38:07 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [48]:
df = pd.read_csv('../predictions/last_train_data/fcozza_target_11')

In [49]:
train = train.merge(df.rename(columns={'fcozza_target_11':current_target_name}), on='id')

# target 12 - rf

In [50]:
current_target_name = 'target_12'

In [51]:
df = pd.read_csv('../predictions/2019.10.28 - 09:12:00 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [52]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_1')

train = train.merge(df.rename(columns={'approach_1':current_target_name}), on='id')

# target 13 - rf + one hot + svd

In [53]:
current_target_name = 'target_13'

In [54]:
df = pd.read_csv('../predictions/2019.10.28 - 12:03:46 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [55]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_2')

train = train.merge(df.rename(columns={'rozanecm_approach_2':current_target_name}), on='id')

# target 14 - rf + one hashing vectorizer + svd

In [56]:
current_target_name = 'target_14'

In [57]:
df = pd.read_csv('../predictions/2019.10.28 - 14:40:22 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [58]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_3')

train = train.merge(df.rename(columns={'rozanecm_approach_3':current_target_name}), on='id')

# target 15 - rf + stopwords

In [59]:
current_target_name = 'target_15'

In [60]:
df = pd.read_csv('../predictions/2019.10.28 - 16:36:13 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [61]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_4')

train = train.merge(df.rename(columns={'rozanecm_approach_4':current_target_name}), on='id')

# target 16 -lightgbm

In [62]:
current_target_name = 'target_16'

In [63]:
df = pd.read_csv('../predictions/2019.10.29 - 11:59:35 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [64]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_7')

train = train.merge(df.rename(columns={'rozanecm_approach_7':current_target_name}), on='id')

# target 17 - lightgbm with grid search

In [65]:
current_target_name = 'target_17'

In [66]:
df = pd.read_csv('../predictions/2019.10.29 - 13:19:02 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [67]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_8')

train = train.merge(df.rename(columns={'rozanecm_approach_8':current_target_name}), on='id')

# target 18 - lightgbm with grid search

In [68]:
current_target_name = 'target_18'

In [69]:
df = pd.read_csv('../predictions/2019.11.11 - 13:07:13 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [70]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_10')

train = train.merge(df.rename(columns={'rozanecm_approach_10':current_target_name}), on='id')

# target 19 - lightgbm grid search + feat eng

In [71]:
current_target_name = 'target_19'

In [72]:
df = pd.read_csv('../predictions/2019.10.31 - 20:26:47 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [73]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_1_with_feat_eng')

train = train.merge(df.rename(columns={'rozanecm_approach_1_with_feat_eng':current_target_name}), on='id')

# target 20 - light gbm grid search all train set + feat eng

In [74]:
current_target_name = 'target_20'

In [75]:
df = pd.read_csv('../predictions/2019.11.02 - 14:42:20 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [76]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_3_with_feat_eng')

train = train.merge(df.rename(columns={'rozanecm_approach_3_with_feat_eng':current_target_name}), on='id')

# target 21 - feat selection rf y lightgbm grid search

In [77]:
current_target_name = 'target_21'

In [78]:
df = pd.read_csv('../predictions/2019.11.02 - 17:01:06 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [79]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_4_with_feat_eng')

train = train.merge(df.rename(columns={'rozanecm_approach_4_with_feat_eng':current_target_name}), on='id')

# target 22 - ?

In [80]:
current_target_name = 'target_22'

In [81]:
df = pd.read_csv('../predictions/2019.11.20 - 02_33_23 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [82]:
df = pd.read_csv('../predictions/on_train_data/on_train_data_rozanecm_approach_16')

train = train.merge(df.rename(columns={'rozanecm_approach_16':current_target_name}), on='id')

# target 23 - lightgbm log precio

In [83]:
current_target_name = 'target_23'

In [84]:
df = pd.read_csv('../predictions/2019.11.20 - 19:09:38 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [85]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_19')

train = train.merge(df.rename(columns={'rozanecm_approach_19':current_target_name}), on='id')

# target 24 - lightgbm log precio y skewed features

In [86]:
current_target_name = 'target_24'

In [87]:
df = pd.read_csv('../predictions/2019.11.20 - 21:19:47 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [88]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_20')

train = train.merge(df.rename(columns={'rozanecm_approach_20':current_target_name}), on='id')

# target 25 - lightgbm new features

In [89]:
current_target_name = 'target_25'

In [90]:
df = pd.read_csv('../predictions/2019.11.21 - 15:06:51 by rozanecm.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [91]:
df = pd.read_csv('../predictions/on_train_data/rozanecm_approach_24')

train = train.merge(df.rename(columns={'rozanecm_approach_24':current_target_name}), on='id')

# target 26 - xgboost tunned 

In [92]:
current_target_name = 'target_26'

In [93]:
df = pd.read_csv('../predictions/2019.11.28 - 00:25:43 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [94]:
df = pd.read_csv('../predictions/on_train_data/fcozza_approach_1')

train = train.merge(df.rename(columns={'fcozza_approach_1':current_target_name}), on='id')

# target 27 - lightgbm with features desc

In [95]:
current_target_name = 'target_27'

In [96]:
df = pd.read_csv('../predictions/2019.11.28 - 00:40:14 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [97]:
df = pd.read_csv('../predictions/on_train_data/fcozza_approach_2')

train = train.merge(df.rename(columns={'fcozza_approach_2':current_target_name}), on='id')

# target 28 - lightgbm with new text feat

In [98]:
current_target_name = 'target_28'

In [99]:
df = pd.read_csv('../predictions/2019.11.30 - 12:19:02 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [100]:
df = pd.read_csv('../predictions/on_train_data/fcozza_approach_5')

train = train.merge(df.rename(columns={'fcozza_approach_5':current_target_name}), on='id')

# target 29 - lightgbm + todos TODOS los features

In [101]:
current_target_name = 'target_29'

In [102]:
df = pd.read_csv('../predictions/2019.12.01 - 17:45:42 by fcozza.csv')

test = test.merge(df.rename(columns={'target':current_target_name}), on='id')

In [103]:
df = pd.read_csv('../predictions/on_train_data/fcozza_approach_6')

train = train.merge(df.rename(columns={'fcozza_approach_5':current_target_name}), on='id')

In [104]:
train.sample(5)

Unnamed: 0,id,precio,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19,target_20,target_21,target_22,target_23,target_24,target_25,target_26,target_27,target_28,fcozza_approach_6
90321,133862,3900000.0,2825104.0,4502269.35,4430647.37,4663052.72,5542563.0,5542563.0,5542563.0,5006688.0,5292022.0,4719696.0,5006688.0,2360942.0,4292050.0,4721420.0,4816199.5,4807917.0,4738586.0,2285638.0,4575554.0,4753499.0,4823392.0,5526000.0,4579936.0,4670303.0,5131914.0,6188446.5,5089311.0,5544667.0,5363865.0
89829,104993,500000.0,511948.99,452106.0,503363.06,538969.9,605488.0,605488.0,605488.0,490097.1,498700.2,726666.94,490097.1,1755018.0,437738.1,485828.73,505807.0,627857.7,575826.8,497507.6,436766.8,525699.5,600525.5,583656.8,514341.4,487536.8,517679.3,460235.06,489817.7,481769.2,476924.3
87211,105566,3950000.0,4921233.0,4339589.6,4356790.0,4405985.8,4060862.0,4060862.0,4060862.0,4139694.0,4007643.0,4392455.0,4139694.0,4501538.0,4291770.0,4412623.3,4812923.75,4051551.0,3992405.0,3933371.0,4168717.0,3983685.0,4111942.0,3577250.0,3997328.0,4048399.0,4173124.0,4269226.0,4008626.0,3713705.0,3868188.0
29585,247215,1650000.0,3504842.09,2577807.52,2504403.73,2517113.9,2939458.0,2939458.0,2939458.0,2626448.0,2448492.0,2783878.2,2626448.0,2644100.0,2572091.9,2957466.61,2891899.4,2766254.0,2758867.0,2431619.0,2552019.0,2900320.0,2807474.0,2622858.0,2612844.0,2644642.0,2513440.0,2537253.5,2641575.0,2513144.0,2462859.0
18136,281905,1400000.0,2866486.1,1876348.79,1924421.43,1808271.88,2309824.0,2309824.0,2309824.0,2082956.0,1844745.0,2311526.0,2082956.0,3298313.0,2168499.68,1984309.06,2053770.0,2187890.0,2327831.0,2343738.0,2064445.0,2186056.0,2269307.0,2543243.0,1987974.0,2127114.0,2093632.0,2127167.5,2232882.0,2196568.0,2017349.0


In [105]:
test.sample(5)

Unnamed: 0,id,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19,target_20,target_21,target_22,target_23,target_24,target_25,target_26,target_27,target_28,target_29
35878,12599,2900633.72,1861475.2,1959053.2,2129822.68,1151743.0,1230432.0,1261052.0,1112837.0,1072322.0,1549971.5,1112837.0,1853577.4,1675478.0,1857855.54,1927323.85,1240243.0,1252453.0,1525968.0,1153804.0,1138488.0,1181135.0,2005302.4,1161568.0,1241160.0,1250388.0,1613333.5,1116244.0,1000500.0,1043105.0
8206,78718,3270668.21,2510952.0,2499950.0,2467305.0,2282250.0,2390572.0,2213829.0,2445307.0,2339461.0,2567813.0,2445307.0,2409474.78,2423715.0,2487183.2,2567565.2,2397034.0,2281676.0,2067982.0,2449297.0,2228898.0,2334214.0,2722221.4,2337129.0,2219271.0,2379679.0,2349671.2,2302341.0,2283136.0,2412432.0
21752,267228,7253450.0,6009140.0,6863403.01,6625790.0,6288506.0,6197848.0,5980548.0,5951591.0,5837419.0,5501714.0,5951591.0,6182000.0,4682321.52,5997519.56,5723363.0,5976128.0,5654491.0,5267483.0,5861966.0,5746475.0,5394554.0,4521527.6,5031057.0,3094411.0,5859379.0,4977345.5,6461636.0,5850617.0,6322653.0
57119,273537,5385979.98,4203980.0,3942680.0,4315550.0,4325480.0,4064960.0,4245505.0,4367194.0,4560838.0,4320214.0,4367194.0,3766934.0,3935076.4,3823788.0,4105526.4,4062816.0,4270634.0,3775103.0,4360531.0,4110369.0,4234361.0,3924330.66,4269573.0,4385199.0,4197971.0,4153811.5,4246556.0,4219397.0,4262970.0
45142,208906,667862.0,706938.77,659765.7,649248.11,600203.2,614763.0,627945.6,617276.7,616606.2,668765.2,617276.7,780833.99,729623.73,682499.0,656949.88,630135.9,683102.5,703323.2,787276.9,613411.5,693074.6,762960.0,700723.1,652283.5,687916.0,720179.9,634843.3,598254.9,612994.8


# Light gbm with grid search for stacking

## Entrenamiento local

In [106]:
train['precio_log'] = np.log(train['precio'])

In [107]:
X = train.drop(['id','precio','precio_log'], axis=1) #set de datos
y = train['precio_log'] #target

In [108]:
seed = 42

In [109]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed)

In [110]:
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
gbm = lgb.LGBMRegressor(silent=False)

param_dist = {"boosting_type":['gbdt','dart'],
              "max_depth": [25,50,75],
              "learning_rate" : [0.001,0.01,0.05,0.1],
              "num_leaves": [300,900,1200],
              "n_estimators": [50,100,200],
             }

grid_search = GridSearchCV(gbm, n_jobs=-1, param_grid=param_dist, cv = 3, scoring="neg_mean_absolute_error", verbose=5)

In [111]:
import time

In [112]:
%%time
grid_search.fit(X_train,y_train)
grid_search.best_estimator_

Fitting 3 folds for each of 216 candidates, totalling 648 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed: 18.2min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 30.5min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed: 47.1min
[Parallel(n_jobs=-1)]: Done 648 out of 648 | elapsed: 49.0min finished


CPU times: user 29.9 s, sys: 925 ms, total: 30.9 s
Wall time: 49min 6s


LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.05, max_depth=25,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=200, n_jobs=-1, num_leaves=300, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [113]:
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb

gbm_optimized = grid_search.best_estimator_

In [114]:
#from sklearn.model_selection import GridSearchCV
#import lightgbm as lgb

#gbm_optimized = lgb.LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
#              importance_type='split', learning_rate=0.05, max_depth=25,
#              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
#              n_estimators=200, n_jobs=-1, num_leaves=300, objective=None,
#              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,
#              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [115]:
%%time
gbm_optimized.fit(X_train, y_train, sample_weight=None, init_score=None, eval_set=[(X_test,y_test)], eval_names=None,
            eval_sample_weight=None, eval_init_score=None, eval_metric='mae', early_stopping_rounds=10,
            verbose=False, feature_name='auto', callbacks=None)

CPU times: user 17.8 s, sys: 101 ms, total: 17.9 s
Wall time: 2.41 s


LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.05, max_depth=25,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=200, n_jobs=-1, num_leaves=300, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [116]:
y_scores = gbm_optimized.predict(X_test, num_iteration=gbm_optimized.best_iteration_)

from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y_test, y_scores))

0.2058977526229464


In [117]:
print(mean_absolute_error(np.exp(y_test), np.exp(y_scores)))

504360.5529469526


## Entrenamiento con todos los datos para obtener predicciones a subir

In [118]:
%%time
gbm_optimized.fit(X,y, sample_weight=None, init_score=None, eval_set=None, eval_names=None,
            eval_sample_weight=None, eval_init_score=None, eval_metric='mae', early_stopping_rounds=None,
            verbose=False, feature_name='auto', callbacks=None)

CPU times: user 35.3 s, sys: 242 ms, total: 35.5 s
Wall time: 4.66 s


LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.05, max_depth=25,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=200, n_jobs=-1, num_leaves=300, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [119]:
# prediciendo valores posta...
predictions = gbm_optimized.predict(test.drop(['id'],axis=1), num_iteration=gbm_optimized.best_iteration_)

In [120]:
exp_predictions = np.exp(predictions)

In [121]:
df = pd.DataFrame(data={'id':test['id'], 'target':exp_predictions})

In [122]:
description = "Final stacking 2"
save_submission(df, description=description)

In [123]:
beep()