In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

In [None]:
df = pd.read_csv("../input/30daysmlkfoldnormalized/train_folds_with_normalized.csv")
df_test = pd.read_csv("../input/30-days-of-ml/test.csv")
sample_submission = pd.read_csv("../input/30-days-of-ml/sample_submission.csv")

In [None]:
useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if 'cat' in col]
df_test = df_test[useful_features]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print(tf.__version__)

In [None]:
def model_1(norm):
    model = keras.Sequential([
      norm,
      layers.Dense(512, activation='relu'),
      layers.Dense(512, activation='relu'),
      layers.Dense(512, activation='relu'),
      layers.Dropout(0.5),
      layers.Dense(512, activation='relu'),
      layers.Dropout(0.5),
      layers.Dense(512, activation='relu'),
      layers.Dropout(0.5),
      #layers.Dense(128, activation='relu'),
      #layers.Dense(128, activation='relu'),
      #layers.Dense(128, activation='relu'),
      ##layers.Dense(128, activation='relu'),
      #layers.Dense(128, activation='relu'),

      layers.Dense(1)
    ])

    model.compile(loss='mean_squared_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
    return model

In [None]:
final_predictions = []
for fold in range(1):
    xtrain =  df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)
    xtest = df_test.copy()

    ytrain = xtrain.target
    yvalid = xvalid.target
    
    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]
    
    ordinal_encoder = OrdinalEncoder()
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])
    xtest[object_cols] = ordinal_encoder.transform(xtest[object_cols])
    
    normalizer = preprocessing.Normalization(axis=-1)
    normalizer.adapt(np.array(xtrain))
    
    dnn_model = model_1(normalizer)
    dnn_model.summary()
    
    history = dnn_model.fit(
    xtrain, ytrain,validation_split=0.2,
    verbose=1, epochs=30, batch_size = 150)
    
    preds_valid = dnn_model.predict(xvalid)
    test_preds = dnn_model.predict(xtest)
    final_predictions.append(test_preds)
    print(fold, mean_squared_error(yvalid, preds_valid, squared=False))

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.ylim([0.4, 1])
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)

In [None]:
plot_loss(history)

In [None]:
xgb_params =  {'n_estimators': 2217, 'max_depth': 4, 'learning_rate': 0.054, 'gamma': 1.0, 'min_child_weight': 5, 'subsample': 0.8, 'colsample_bytree': 0.6, 
               'reg_alpha': 0.9, 'reg_lambda': 0.2, 'random_state':4, 'n_jobs':4}
model = XGBRegressor(**xgb_params) 
final_predictions = []
for fold in range(5):
    xtrain =  df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)
    xtest = df_test.copy()

    ytrain = xtrain.target
    yvalid = xvalid.target
    
    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]
    
    ordinal_encoder = OrdinalEncoder()
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])
    xtest[object_cols] = ordinal_encoder.transform(xtest[object_cols])
    
    model.fit(xtrain, ytrain)
    preds_valid = model.predict(xvalid)
    test_preds = model.predict(xtest)
    final_predictions.append(test_preds)
    print(fold, mean_squared_error(yvalid, preds_valid, squared=False))

In [None]:
final_predictions[0]

In [None]:
preds = np.mean(np.column_stack(final_predictions), axis=1)

In [None]:
sample_submission.target = preds
sample_submission.to_csv("submission.csv", index=False)