In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer

from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor

In [None]:
from pathlib import Path
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
input_path = Path('/kaggle/input/tabular-playground-series-jan-2021/')

In [None]:
train = pd.read_csv(input_path / "train.csv", index_col='id')
test = pd.read_csv(input_path / "test.csv", index_col='id')
submission = pd.read_csv(input_path / "sample_submission.csv", index_col='id')
display(train.head())

In [None]:
X = train.iloc[:, :-1]
y = train['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.60)

In [None]:
lr_params = {'fit_intercept': [True, False]
            }

sgdr_params = {'fit_intercept': [True, False],
               'alpha': [0.0001, 0.0005, 0.001],
               'eta0': [0.001, 0.005, 0.01, 0.05, 0.1],
               'random_state': [37]
                }

ridge_params = {'alpha': [0.1, 0.5, 1.0],
                'random_state': [37]
               }

lasso_params = {'alpha': [0.01, 0.1, 0.5, 1.0],
                'random_state': [37]
               }

elnet_params = {'alpha': [0.01, 0.1, 0.5, 1.0],
                'random_state': [37]
               }

rfr_params = {'n_estimators': [10, 50, 100],
              'max_depth': [1, 3, 5],
              'n_jobs': [-1],
              'random_state': [37]
             }

In [None]:
models = {'lr': LinearRegression(), 
          'sgdr': SGDRegressor(),
          'ridge': Ridge(),
          'lasso': Lasso(),
          'elnet': ElasticNet(),
          'rfr': RandomForestRegressor()
         }

params = [lr_params, sgdr_params, ridge_params, lasso_params, elnet_params, rfr_params]

In [None]:
best_params = {}
best_result = {}

for n, (name, model) in enumerate(models.items()):
    clf = GridSearchCV(estimator=model, param_grid=params[n], cv=5, scoring='neg_root_mean_squared_error').fit(X_train, y_train)
    best_params[name] = clf.best_params_
    best_result[name] = (-clf.score(X_test, y_test))
    print(f"{str(name)} -- {-clf.score(X_test, y_test)}")

In [None]:
best_model = sorted(best_result.items(), key=lambda x: x[1])[0][0]

In [None]:
model = models[best_model].set_params(**best_params[best_model]).fit(X, y)
model

In [None]:
submission['target'] = model.predict(test)
submission.to_csv(best_model + '.csv')