In [None]:
import time
import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from ast import literal_eval
import random
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None
from utility.utility import ndcg, mAP_score, highlight, history_plot, generate_pairwise_dataset
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_colwidth', 9999)
pd.set_option('display.max_rows', 50)
%matplotlib inline

In [None]:
class Conv_stack(tf.keras.layers.Layer):
    def __init__(self, lg, nf):
        super(Conv_stack, self).__init__(name='ConV_stack')
        self.lg = lg
        self.nf = nf
        self.conv_dict = {}
        for i in range(2, self.lg+1):
            self.conv_dict[i] = tf.keras.layers.Conv2D(self.nf, i, strides=(1, 1), padding='same')

    def call(self, inputs):
        inputs = tf.expand_dims(inputs, axis=-1)
        x_1 = inputs
        x = {}
        for i in range(2, self.lg+1):
            x[i] = self.conv_dict[i](inputs)
        return tf.keras.layers.concatenate([x_1] + [x[k] for k in x]) 

In [None]:
class Dim_wise_max_pooling(tf.keras.layers.Layer):
    def __init__(self, lg, nf):
        super(Dim_wise_max_pooling, self).__init__(name='dim_wise_max_pooling')
        self.lg = lg
        self.nf = nf
        
    def call(self, inputs):
        channel_range = [self.nf*i+1 for i in range(self.lg)]
        x_1 = inputs[:, :, :, 0]
        x = {}
        for i in range(2, self.lg+1):
            x[i] = tf.reduce_max(inputs[:, :, :, channel_range[i-2]:channel_range[i-1]], axis=-1)
            
        return tf.keras.layers.concatenate([x_1] + [x[k] for k in x])

In [None]:
class Row_wise_max_pooling(tf.keras.layers.Layer):
    def __init__(self, ns, lg, firstk):
        super(Row_wise_max_pooling, self).__init__(name='row_wise_max_pooling')
        self.ns = ns
        self.lg = lg
        self.firstk = firstk
        
    def call(self, inputs):
        x = {}
        for i in range(1, self.lg+1):
            x[i] = tf.math.top_k(inputs[:, :, self.firstk*(i-1):self.firstk*i], k=self.ns)[0]
            
        return tf.keras.layers.concatenate([x[k] for k in x])

In [None]:
class Idf_concat(tf.keras.layers.Layer):
    def __init__(self):
        super(Idf_concat, self).__init__(name='idf_concat')
        
    def call(self, inputs, idf):
        expand_idf = tf.expand_dims(idf, axis=-1)
        return tf.keras.layers.concatenate([inputs, expand_idf])

In [None]:
class PACCR(tf.keras.Model):
    def __init__(self, firstk, lq, lg, nf, ns):
        super(PACCR, self).__init__(name='')
        self.firstk = firstk
        self.lq = lq
        self.lg = lg
        self.nf = nf
        self.ns = ns
        
        self.conv_stack = Conv_stack(lg=self.lg, nf=self.nf)
        self.dim_wise_max_pooling = Dim_wise_max_pooling(lg=self.lg, nf=self.nf)
        self.row_wise_max_pooling = Row_wise_max_pooling(lg=self.lg, ns=self.ns, firstk=self.firstk)
        self.idf_concat = Idf_concat()

    def call(self, inputs, idf):
        x = self.conv_stack(inputs)
        x = self.dim_wise_max_pooling(x)
        x = self.row_wise_max_pooling(x)
        x = self.idf_concat(x, idf)
        return x

In [None]:
class DRMM(tf.keras.Model):
    def __init__(self):
        super(DRMM, self).__init__(name='DRMM')
        initializer = tf.keras.initializers.he_normal()
        self.dense1 = tf.keras.layers.Dense(5, activation='relu', kernel_initializer=initializer)
        self.dense2 = tf.keras.layers.Dense(1, activation='relu', kernel_initializer=initializer)
        self.dense3 = tf.keras.layers.Dense(1, activation='relu', kernel_initializer=initializer)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = tf.squeeze(x)
        x = self.dense3(x)
        
        return x

In [None]:
class PACCR_DRMM(tf.keras.Model):
    def __init__(self, firstk, lq, lg, nf, ns):
        super(PACCR_DRMM, self).__init__(name='')
        self.firstk = firstk
        self.lq = lq
        self.lg = lg
        self.nf = nf
        self.ns = ns
        
        self.paccr = PACCR(firstk=self.firstk, lq=self.lq, lg=self.lg, nf=self.nf, ns=self.ns)
        self.drmm = DRMM()
        
    def call(self, inputs, idf):
        x = self.paccr(inputs, idf)
        x = self.drmm(x)
        return x

In [None]:
class Pairwise_PACCR_DRMM(tf.keras.Model):
    def __init__(self, firstk, lq, lg, nf, ns):
        super(Pairwise_PACCR_DRMM, self).__init__(name='Pairwise_PACCR_DRMM')
        self.Paccr_Drmm = PACCR_DRMM(firstk, lq, lg, nf, ns)
        
    def call(self, inputs):
        positive_sim = inputs['positive_sim_matrix']
        negative_sim = inputs['negative_sim_matrix']
        idf_softmax = inputs['idf_softmax']
        
        positive = self.Paccr_Drmm(positive_sim, idf_softmax)
        negative = self.Paccr_Drmm(negative_sim, idf_softmax)
        
        return tf.concat([positive, negative], axis=0) 

In [None]:
# # bert
# test = pd.read_csv('./data/paccr_drmm_bert_test_all.csv', converters={"query_idf": literal_eval,
#                                                                       "idf_softmax": literal_eval,
#                                                                       "sim_matrix": literal_eval,
#                                                                       "query_token": literal_eval,
#                                                                       "product_title_token": literal_eval,
#                                                                       "token_ids": literal_eval,
#                                                                       "drmm_hist": literal_eval,
#                                                                       'token': literal_eval})

# test['binary_relevance'] = test['median_relevance'].apply(lambda x: 0 if x <= 2 else 1)

# df = generate_pairwise_dataset(test)
# df.reset_index(inplace=True, drop=True)

# dev_q = set(random.sample(list(df['query'].unique()), 45))
# train_q = set(df['query'].unique()) - dev_q

# train = pd.concat([df.groupby('query').get_group(name) for name in train_q]).sample(frac=1).reset_index(drop=True)
# dev = pd.concat([df.groupby('query').get_group(name) for name in dev_q]).sample(frac=1).reset_index(drop=True)

# metadata = {'negative_sim_matrix': tf.constant(train['sim_matrix_N'].tolist(), dtype=tf.float32), 
#             'positive_sim_matrix': tf.constant(train['sim_matrix_P'].tolist(), dtype=tf.float32),
#             'idf_softmax': tf.constant(train['idf_softmax'].tolist(), dtype=tf.float32)}

# metadata_dev ={'negative_sim_matrix': tf.constant(dev['sim_matrix_N'].tolist(), dtype=tf.float32), 
#             'positive_sim_matrix': tf.constant(dev['sim_matrix_P'].tolist(), dtype=tf.float32),
#             'idf_softmax': tf.constant(dev['idf_softmax'].tolist(), dtype=tf.float32)}

# firstk = 13
# lq = 8

In [None]:
# word2vec
df = pd.read_csv('./data/paccr_drmm_.csv', converters={"negative_sim_matrix": literal_eval,
                                                      "positive_sim_matrix": literal_eval,
                                                      "idf_softmax": literal_eval})
df = df[['query_preprocessed', 'negative_sim_matrix', 'positive_sim_matrix', 'idf_softmax']]

test = pd.read_csv('./data/paccr_drmm_test.csv', converters={"hist": literal_eval,
                                                             "query_idf": literal_eval,
                                                             "sim_matrix": literal_eval,
                                                             "idf_softmax": literal_eval})

dev_q = set(random.sample(list(df['query_preprocessed'].unique()), 45))
train_q = set(df['query_preprocessed'].unique()) - dev_q

train = pd.concat([df.groupby('query_preprocessed').get_group(name) for name in train_q]).sample(frac=1).reset_index(drop=True)
dev = pd.concat([df.groupby('query_preprocessed').get_group(name) for name in dev_q]).sample(frac=1).reset_index(drop=True)

metadata = {'negative_sim_matrix': tf.constant(train['negative_sim_matrix'].tolist(), dtype=tf.float32), 
            'positive_sim_matrix': tf.constant(train['positive_sim_matrix'].tolist(), dtype=tf.float32),
            'idf_softmax': tf.constant(train['idf_softmax'].tolist(), dtype=tf.float32)}

metadata_dev ={'negative_sim_matrix': tf.constant(dev['negative_sim_matrix'].tolist(), dtype=tf.float32), 
            'positive_sim_matrix': tf.constant(dev['positive_sim_matrix'].tolist(), dtype=tf.float32),
            'idf_softmax': tf.constant(dev['idf_softmax'].tolist(), dtype=tf.float32)}

firstk = 8
lq = 6

In [None]:
ds = tf.data.Dataset.from_tensor_slices(metadata)
ds = ds.shuffle(buffer_size=len(train))
batchs = 128
ds = ds.batch(batchs).repeat()
example_batch = next(iter(ds))

In [None]:
tf.keras.backend.clear_session()
model = Pairwise_PACCR_DRMM(firstk, lq, lg, nf, ns)
n = 20
lg = 5
nf = 32
ns = 2
learning_rate = 1
print_step = 10
optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)

Pairwise_ranking_loss(y_true=None, y_pred=model(example_batch))

In [None]:
loss_sum = 0
ndcg_sum = 0
step_history = []
loss_history = []
loss_history_dev = []
ndcg_history = []

start = time.time()
for step, batch_train in enumerate(ds):
    
    with tf.GradientTape() as tape:
        logits = model(batch_train)
        loss_value = Pairwise_ranking_loss(y_true=None, y_pred=logits)
        loss_sum += loss_value 
        
        if step == 0:
            loss_history_dev.append(Pairwise_ranking_loss(y_true=None, y_pred=model(dev_set)))
            
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    if step % print_step == 0:
        current_loss_average = float(loss_sum)/print_step
        if step ==0:
            current_loss_average = loss_sum

        logits_dev = model(metadata_dev)
        current_loss_average_dev = Pairwise_ranking_loss(y_true=None, y_pred=logits_dev)
            
        for q in dev_q:
            ndcg_test = test[test['query_preprocessed'] == q]
            metadata_ndcg = {'sim_matrix': tf.constant(ndcg_test['sim_matrix'].tolist(), dtype=tf.float32), 
                             'idf_softmax': tf.constant(ndcg_test['idf_softmax'].tolist(), dtype=tf.float32)}
            
            ndcg_test['rel'] = model.predict(metadata_ndcg).numpy()
            rel_pred = list(ndcg_test.sort_values(by=['rel'], axis=0, ascending=False)['median_relevance']-1)
            ndcg_sum += ndcg(rel_pred, p=n, form="exp")
            
        current_ndcg_average = ndcg_sum/len(dev_q)
        step_history.append(step)
        loss_history.append(current_loss_average)
        loss_history_dev.append(current_loss_average_dev)
        ndcg_history.append(current_ndcg_average)
        
        print("Training loss at step %d: %.5f, dev_loss : %.5f, nDCG@20 : %.5f"% (step, 
                                                                  current_loss_average, 
                                                                  current_loss_average_dev,
                                                                  current_ndcg_average))
        print("Seen so far: %s train samples, learning rate: %.4f" % ((step + 1) * batchs, learning_rate))
        ndcg_sum = 0
        loss_sum = 0
        start = time.time()
        
    if step == 200:
        break