In [None]:
from keras import models
from keras import layers
from keras.metrics import RootMeanSquaredError
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

def build_reg_model(train_shape):
    model = models.Sequential()
    model.add(layers.Dense(64, activation = 'relu', input_shape = (train_shape[1], )))
    # model.add(layers.Dense(32, activation = 'relu'))
    model.add(layers.Dense(16, activation = 'relu'))
    model.add(layers.Dense(1, activation = 'linear'))
    model.compile(
        optimizer = 'adam', 
        loss = 'mse', 
        # metrics = ['mae']
        metrics=[RootMeanSquaredError()]
    )
    return model

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

home_path = '/kaggle/input/tabular-playground-series-jan-2021/'

train = pd.read_csv(home_path + 'train.csv')
test = pd.read_csv(home_path + 'test.csv')

train.head()

In [None]:
num_features = []

for i in train.columns:
    if 'cont' in i:
        num_features.append(i)

for col in num_features:
    prep = StandardScaler()
    train[col] = prep.fit_transform(train[[col]])
    test[col] = prep.transform(test[[col]])

In [None]:
# train = train.loc[(train['target'] >= 3) & (train['target'] <= 9)]

x = train.drop(labels = ['id', 'target'], axis = 1).values
y = train['target'].values

seed = 7
np.random.seed(seed)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.25, random_state = seed)

In [None]:
keras_reg_model = build_reg_model(x.shape)

early_stopping = EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

keras_reg_model.fit(
    x_train, 
    y_train, 
    validation_data = (x_val, y_val),
    epochs = 500, 
    batch_size = 1000, 
    callbacks=[early_stopping],
    verbose = 0
)

In [None]:
y_pred = keras_reg_model.predict(x_val)

mse = mean_squared_error(y_true = y_val, y_pred = y_pred)

np.sqrt(mse)

In [None]:
real_test = test.drop(labels = ['id'], axis = 1)
ids = test['id'].values

real_pred = keras_reg_model.predict(real_test)

In [None]:
sample_submission = pd.read_csv(home_path + 'sample_submission.csv')
sub = sample_submission
sub['target'] = real_pred

print(len(sub.index))

sub.to_csv('keras_submission_final.csv', index = False)