In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [4]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import BatchNormalization, Dropout, Dense

In [5]:
from tensorflow.keras.optimizers import Adam

In [6]:
from sklearn.model_selection import KFold

In [7]:
np.random.seed(42)

In [8]:
train = pd.read_csv('E:/kaggle/Benz/data/train.csv/train.csv')
test = pd.read_csv('E:/kaggle/Benz/data/test.csv/test.csv')

In [9]:
train_test = pd.concat((train, test), axis=0)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [10]:
class DummyTransfomer(BaseEstimator, TransformerMixin):
    def fit(self, X, y = None):
        """
        X : Dataframe, which needed to be transformed
        """
        return self
    def transform(self, X, y = None):
        new_X = X.copy()
        for column_name in new_X.columns:
            if new_X[column_name].dtype == 'object':
                dummy = pd.get_dummies(new_X[column_name])
                new_X = pd.concat([new_X, dummy], axis = 1)
                new_X.drop(column_name, axis = 1, inplace = True)
        return new_X

In [11]:
dummytransfer = DummyTransfomer()
dummytransfer.fit(train_test)
new_train_test = dummytransfer.transform(train_test.drop('y', axis=1))

In [12]:
new_train_test.drop('ID', axis=1, inplace=True)

In [13]:
def keras_r2(y_true, y_predict):
    SS_res = K.sum(K.square(y_true - y_predict))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return SS_res/(SS_tot + K.epsilon())

In [14]:
def my_model():
    train_shape = train.shape
    m = train_shape[0]
    n = train_shape[1]
    model = keras.Sequential()
    model.add(Dense(m / 4, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Dense(m / 8, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Dense(m / 16, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Dense(1))
    
    model.compile(loss = 'mean_squared_error', optimizer = Adam(), metrics = [keras_r2])
    return model

In [15]:
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import callbacks

In [16]:
from sklearn.metrics import r2_score

In [17]:
X_train = new_train_test.iloc[:4209, :]
X_test = new_train_test.iloc[4209:, :]

In [18]:
y_train = train['y']

In [19]:
estimator = KerasRegressor(build_fn = my_model, nb_epoch = 300, batch_size = 300, verbose = 1)

In [20]:
kf = KFold(n_splits = 4, shuffle = True, random_state = 42)

In [25]:
import datetime
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [28]:
callback = [#callbacks.EarlyStopping(monitor = 'keras_r2', patience = 10, mode = 'max', verbose = 1), 
            callbacks.ModelCheckpoint('ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', save_best_only=True),
            callbacks.TensorBoard(log_dir = 'my_model', histogram_freq=0, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')]

In [37]:
from sklearn.model_selection import train_test_split
new_X_train, new_X_test, new_y_train, new_y_test = train_test_split(X_train, y_train, random_state = 42)

In [41]:
estimator.fit(new_X_train.values, new_y_train.values, callbacks = callback, validation_data = (new_X_test.values, new_y_test.values))

Train on 3156 samples, validate on 1053 samples


<tensorflow.python.keras.callbacks.History at 0x12a0bf97d48>

In [29]:
# r2_score_fold_list = []
# for train_index, test_index in kf.split(X_train):
#     estimator.fit(X_train.iloc[train_index, :].values, y_train.iloc[train_index].values, epochs = 300, callbacks = callback, validation_data = (X_train.iloc[test_index,:].values, y_train.iloc[test_index].values))
#     prediction = estimator.predict(X_train.iloc[test_index, :])
#     r2_evaluation = r2_score(y_train.iloc[test_index], prediction)
#     r2_score_fold_list.append(r2_evaluation)

Train on 3156 samples, validate on 1053 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 00011: early stopping
Train on 3157 samples, validate on 1052 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 00013: early stopping
Train on 3157 samples, validate on 1052 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 00013: early stopping
Train on 3157 samples, validate on 1052 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300


Epoch 10/300
Epoch 11/300
Epoch 00011: early stopping


In [30]:
r2_score_fold_list

[-55.46741876755028,
 -45.39890199010135,
 -55.77554579787883,
 -59.15082526287438]

In [42]:
test_prediction = estimator.predict(X_test)



In [43]:
test['y'] = pd.Series(test_prediction)

In [44]:
test[['ID', 'y']].to_csv('my_submission4.csv')