In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.preprocessing.sequence import pad_sequences

from mllib.utils import load_pickle
from mllib.params import FieldNames, FileNames
from mllib.transformers import ListLen, ListAggregation
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)



In [2]:
# customer_id demog -- auto encoder
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Embedding, TimeDistributed, Input, concatenate, Reshape, Dense, GRU, GlobalAveragePooling1D
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

In [3]:
class ROC_AUC(Callback):
    def __init__(self, validation_data):
        self.X_val, self.y_val = validation_data
        #self.y_val = np.vstack([y for x, y in validation_data])
    
    def on_epoch_end(self, epoch, logs={}):
        y_preds = self.model.predict(self.X_val, batch_size=256).flatten()
        val_rocauc = roc_auc_score(self.y_val, y_preds)
        logs.update({"val_rocauc": val_rocauc})
        print("ROC AUC for this fold, is ", val_rocauc)


In [5]:
CUST_HIST = 512
COUPON_N = 101
ITEM_VOCAB = 74066
def _build_model():
    inp1 = Input(shape=(CUST_HIST,))
    inp2 = Input(shape=(COUPON_N, ))
    emb_layer = Embedding(ITEM_VOCAB, 100)
    emb1 = emb_layer(inp1)
    emb2 = emb_layer(inp2)
    
    gru = GRU(128, return_sequences=True, go_backwards=False)(emb1)
    ave1 = GlobalAveragePooling1D()(gru)
    ave2 = GlobalAveragePooling1D()(emb2)
    x = concatenate([ave1, ave2])
    out = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=[inp1, inp2], outputs=[out])
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

In [6]:
x_tr_cust = np.load('data/tr_customer_hist_nn_data.npy')
x_val_cust = np.load('data/val_customer_hist_nn_data.npy')

x_tr_cp = np.load('data/tr_coupon_nn_data.npy')
x_val_cp = np.load('data/val_coupon_nn_data.npy')

In [7]:
x_tr_cp[:, :, 0]

array([[ 560,  560,  560, ...,    0,    0,    0],
       [1558, 1558, 1558, ...,    0,    0,    0],
       [ 133,  133,  133, ...,    0,    0,    0],
       ...,
       [  56,   56,   56, ...,    0,    0,    0],
       [2188, 2188, 2188, ...,    0,    0,    0],
       [ 864,  864,  864, ...,    0,    0,    0]], dtype=int32)

In [8]:
tr = load_pickle(FileNames.tr_v2)
y_tr = tr['redemption_status'].values

val = load_pickle(FileNames.val_v2)
y_val = val['redemption_status'].values

In [9]:
model = _build_model()

In [10]:
roc = ROC_AUC(validation_data=([x_val_cust, x_val_cp[:, :, 0]], y_val))
model.fit([x_tr_cust, x_tr_cp[:, :, 0]], y_tr, epochs=20,
         callbacks=[roc], batch_size=128)

Train on 50431 samples
Epoch 1/20

InternalError:  [_Derived_]  Failed to call ThenRnnForward with model config: [rnn_mode, rnn_input_mode, rnn_direction_mode]: 3, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]: [1, 100, 128, 1, 512, 256, 0] 
	 [[{{node CudnnRNN}}]]
	 [[model/gru/StatefulPartitionedCall]] [Op:__inference_distributed_function_4822]

Function call stack:
distributed_function -> distributed_function -> distributed_function


In [18]:
tr.loc[tr.coupon_id == 8, 'redemption_status'].sum()

7

In [20]:
sum(te.coupon_id == 8)

0

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116],
           dtype='int64', name='coupon_id', length=1116)

In [36]:
pd.concat([tr, te]).coupon_id.nunique()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


1116