In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import *
from tensorflow.keras import backend as K
from tensorflow.keras.losses import *

In [18]:
def SMAPE(y_true, y_pred):
    N = tf.cast(tf.shape(y_true, out_type=tf.int32)[-1], dtype=tf.float32)
    t1 = tf.math.pow(tf.math.abs(tf.math.subtract(y_pred, y_true)), tf.constant(2.0, dtype=tf.float32))
    t2 = tf.math.minimum( tf.math.multiply(tf.constant(2.0, dtype=tf.float32),tf.math.abs(y_true)), tf.math.abs(y_pred))
    t3 = tf.math.pow(tf.math.add(t2, tf.math.abs(y_true)), tf.constant(2.0, dtype=tf.float32))
    t4 = tf.math.divide(t1, t3)
    m = tf.math.multiply(tf.math.divide(tf.constant(100.0,dtype=tf.float32), N), t4)
    res = tf.math.subtract(tf.constant(100.0,dtype=tf.float32), m)
    return res

def test_SMAPE(y_true, y_pred):
    score = 100
    val = 0
    for i in range(y_true.shape[0]):
        val += abs(y_pred[i]-y_true[i])**2/(min(2*abs(y_true[i]),abs(y_pred[i])) + abs(y_true[i]) )**2
    score -= (100/y_true.shape[0])*val
    return score

In [21]:
# callback_list = [
#     ModelCheckpoint(filepath='saved_weight/Too_1_{epoch:02d}.hdf5', verbose=1, period=200),
    
# ]


def get_nn_model_1(lr=1e-3, amsgrad=True, n_feature=3, l2_kernel=0.03, l1_act=0.03, decay=0.001):
    adam = Adam(lr=lr, decay=decay, amsgrad=amsgrad)
    model = Sequential()
    model.add(Dense(1024, input_shape=(n_feature,), kernel_regularizer=tf.keras.regularizers.l2(l2_kernel), \
                    activity_regularizer=tf.keras.regularizers.l1(l1_act),
                   kernel_initializer='Orthogonal'))
    model.add(LeakyReLU())
    model.add(Dropout(0.5))
    model.add(Dense(512, kernel_initializer='Orthogonal'))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Dense(256, kernel_initializer='Orthogonal'))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Dense(128, kernel_initializer='Orthogonal'))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Dense(1, kernel_initializer='he_normal'))
    model.add(LeakyReLU())
    model.compile(loss='mse', optimizer=adam, metrics=[SMAPE])
    model.build()
    model.summary()
    return model
model = get_nn_model_1(n_feature=7)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 1024)              8192      
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 1024)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 512)               524800    
_________________________________________________________________
leaky_re_lu_12 (LeakyReLU)   (None, 512)               0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 512)               2048      
_________________________________________________________________
dense_14 (Dense)             (None, 256)              

In [22]:
norm_df = pd.read_csv('data_df.csv')
train_df = pd.read_csv('train.csv')

In [23]:
whole_df = pd.merge(norm_df, train_df, on='id', how='inner')

# random data
train_set = whole_df.sample(frac=0.90, random_state=0)
test_set = whole_df.drop(train_set.index)

train_set.head()

Unnamed: 0,id,gender,ocp_cd,age,cc_txn_amt,cc_count,kp_txn_count,kp_txn_amt,income
11841,11842,2,9.0,3,4600.0,5.0,39.0,223100.0,28000
19602,19603,1,4.0,4,273400.0,97.0,0.0,0.0,65000
45519,45520,2,3.0,4,10400.0,13.0,0.0,0.0,21000
25747,25748,1,9.0,5,0.0,0.0,0.0,0.0,33000
42642,42643,2,4.0,3,1470200.0,230.0,228.0,4822400.0,95000


In [24]:
def df2np(df):
    if 'id' in df.columns:
        df = df.drop('id', axis=1)
   
    y = None
    if 'income' in df.columns:
        y = df['income'].to_numpy()
        df = df.drop('income', axis=1)
    x = df.to_numpy()
    return x, y

In [25]:
x_train, y_train = df2np(train_set)
x_test, y_test = df2np(test_set)

In [26]:
hist = model.fit(x=x_train,y=y_train, epochs=300, batch_size=1024, shuffle=True,validation_data=(x_test, y_test), verbose=1)
# _y = model.predict(x_test)
#     print('test_SMAPE:',test_SMAPE(y_test, _y))

Train on 45000 samples, validate on 5000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300

KeyboardInterrupt: 

In [None]:
# model.save_weights('./weights/too_model.hdf5')

In [None]:
loss,val_loss, score,val_score = hist.history['loss'],hist.history['val_loss'], hist.history['SMAPE'], hist.history['val_SMAPE']

In [None]:
plt.figure(figsize=(7,5))
plt.title('loss (mse)')
plt.plot(np.arange(len(loss)),loss,label='loss',color='blue')
plt.plot(np.arange(len(val_loss)),val_loss,label='val_loss',color='orange')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.show()

plt.figure(figsize=(7,5))
plt.title('SMAPE')
plt.plot(np.arange(len(score)),score,label='SMAPE',color='blue')
plt.plot(np.arange(len(val_score)),val_score,label='val_SMAPE',color='orange')
plt.xlabel('epoch')
plt.ylabel('score')
plt.legend()
plt.show()

In [None]:
test_df = pd.read_csv('test.csv')
norm_test_df = pd.merge(test_df, norm_df, on='id', how='inner')
norm_test_df

In [66]:
x_test, _ = df2np(norm_test_df)

In [67]:
model.load_weights('./weights/too_model.hdf5')

In [68]:
y_pred = model.predict(x_test)
y_pred

array([[25170.336],
       [18778.193],
       [26193.99 ],
       ...,
       [22761.492],
       [33594.78 ],
       [24332.344]], dtype=float32)

In [69]:
test_df['income'] = y_pred

In [71]:
test_df.to_csv('output_too.csv')