Please give an upvote if you find this useful.

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [None]:
path = Path('../input/tabular-playground-series-jan-2021/')
train = pd.read_csv(path / 'train.csv')
test = pd.read_csv(path / 'test.csv')
sub = pd.read_csv(path / 'sample_submission.csv')

In [None]:
train.head()

In [None]:
train.columns

In [None]:
train.describe()

In [None]:
from sklearn.model_selection import train_test_split
y = train['target']
x = train.drop(columns = ['target', 'id'])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.1)

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 80, max_features = 'auto')
rf.fit(xtrain, ytrain)
print('Training Done using Random Forest')


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
ranking = np.argsort(-rf.feature_importances_)
f, ax = plt.subplots(figsize = (11, 9))
sns.barplot(x = rf.feature_importances_[ranking], y = xtrain.columns.values[ranking], orient = 'h')
ax.set_xlabel('Feature Importance')
plt.tight_layout()
plt.show()

In [None]:
import xgboost as xgb

xgb = xgb.XGBRegressor(max_depth=3,learning_rate=0.1,n_estimators=1000,reg_alpha=0.001,reg_lambda=0.000001,n_jobs=-1,min_child_weight=3)
xgb.fit(xtrain, ytrain)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def create_model(optimizer = 'Adam'):
    model = Sequential()
    model.add(Dense(xtrain.shape[1], input_dim = xtrain.shape[1], 
                    kernel_initializer = 'normal', activation = 'relu' ))
    model.add(Dense(16, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer = 'normal'))
    
    model.compile(loss = 'mean_squared_error', optimizer = optimizer)
    return model

dl = KerasRegressor(build_fn = create_model, verbose = 0)

dl.fit(xtrain, ytrain)

In [None]:
import lightgbm as lgb
lgb_model = lgb.LGBMRegressor()

param_grid = {
    "boosting_type": ['gbdt'],
    "num_leaves": [9, 19],  #[ 19, 31, 37, 47],
    "max_depth": [29], #[7, 15, 29, 37, 47, 53], 
    "learning_rate": [0.1, 0.15],
    "n_estimators": [1000], #[500, 1000, 2000], 
    "subsample_for_bin": [200000], #[20000, 200000, 2000000], 
    "objective": ["regression"],
    "min_child_weight": [0.01], #[0.001, 0.01], 
    "min_child_samples":[100, 200], #[20, 50, 100], 
    "subsample":[1.0], 
    "subsample_freq":[0], 
    "colsample_bytree":[1.0], 
    "reg_alpha":[0.0], 
    "reg_lambda":[0.0]
}



In [None]:
from sklearn.model_selection import RandomizedSearchCV

model = RandomizedSearchCV(
    estimator=lgb_model,
    param_distributions=param_grid,
    n_iter=100,
    scoring="neg_root_mean_squared_error",
    verbose=10,
    n_jobs=-1,
    cv=5
)

model.fit(xtrain, ytrain)

In [None]:
print(f"Best score: {model.best_score_}")
print("Best parameters from the RandomSearchCV:")
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(param_grid.keys()):
    print(f"\t{param_name}: {best_parameters[param_name]}")

In [None]:
lgb = model.best_estimator_

In [None]:
xtrain2 = pd.DataFrame(
    {'XGB' : xgb.predict(xtrain),
    'NN' : dl.predict(xtrain),
    'LGB' : lgb.predict(xtrain)
    }
)
xtrain2.head()

In [None]:
from sklearn import linear_model

reg = linear_model.LinearRegression()
reg.fit(xtrain2, ytrain)

In [None]:
test_data = test.drop(columns = ['id'])

In [None]:
xtest2 = pd.DataFrame( {'XGB':xgb.predict(test_data),
     'DL': dl.predict(test_data).ravel(),
     'SVR': lgb.predict(test_data),
    })

ensemble_predictions = reg.predict(xtest2)

In [None]:
submission= pd.DataFrame({'id' : test['id'], 'target': ensemble_predictions })

print(submission.head())

In [None]:
submission.to_csv('submission.csv', index = False)