In [1]:
import time
import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
import ast
import random
from ast import literal_eval

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 50)

In [2]:
class Dense(tf.keras.layers.Layer):
    def __init__(self, units, input_dims=5, **kwargs):
        super(Dense, self).__init__(name='Linear', **kwargs)
        self._supports_ragged_inputs = True
        self.units = units
        self.input_dims = input_dims

    def build(self, input_shape):
        initializer = tf.keras.initializers.he_normal()
        self.w = self.add_weight(
            shape=(self.input_dims, self.units),
            initializer=initializer,
            trainable=True)
        
        self.b = self.add_weight(
            shape=(self.units,), 
            initializer=tf.zeros_initializer, 
            trainable=True)
        
    def call(self, inputs):
        return tf.ragged.map_flat_values(tf.matmul, inputs, self.w) + self.b

In [3]:
class Word_Matching_Network(tf.keras.Model):
    def __init__(self):
        super(Word_Matching_Network, self).__init__(name='Word_Matching_Network')
        self._supports_ragged_inputs = True        
        self.Layer1 = Dense(5, input_dims=30)
        self.Layer2 = Dense(5)
        self.Layer3 = Dense(1)

    def call(self, inputs):
        x = self.Layer1(inputs)
        x = tf.ragged.map_flat_values(tf.nn.relu, x)
        
        x = self.Layer2(x)
        x = tf.ragged.map_flat_values(tf.nn.relu, x)
        
        x = self.Layer3(x)
        x = tf.ragged.map_flat_values(tf.nn.relu, x)
        return x

In [4]:
class Gating_Network(tf.keras.layers.Layer):
    def __init__(self):
        super(Gating_Network, self).__init__()
        self._supports_ragged_inputs = True

    def build(self, input_shape):
        initializer = tf.keras.initializers.RandomUniform(minval=0, maxval=1)
        self.w = self.add_weight(
            shape=(1, 1),
            name='Gating_weight',
            initializer=initializer,
            trainable=True)

    def ragged_softmax(self, logits):
        numerator = tf.exp(logits)
        denominator = tf.reduce_sum(numerator, axis=1)
        softmax = tf.math.divide_no_nan(numerator, tf.reshape(denominator, shape=(logits.shape[0], -1)))
        return softmax
        
    def call(self, idf):
        g = tf.math.multiply(idf, self.w)
        softmax = self.ragged_softmax(g)
        return softmax

In [5]:
class Score_Aggregation(tf.keras.layers.Layer):
    def __init__(self):
        super(Score_Aggregation, self).__init__(name='Score_Aggregation')
        self._supports_ragged_inputs = True

    def call(self, Z, g):
        score = tf.ragged.map_flat_values(tf.reshape, Z, shape=(-1, ))
        gating = g
        s_g_sum = tf.math.multiply(gating, score)
        return tf.math.reduce_sum(s_g_sum, axis=1)

In [20]:
class DRMM(tf.keras.Model):
    def __init__(self):
        super(DRMM, self).__init__(name='DRMM')
        self._supports_ragged_inputs = True
        self.Word_Matching_Network = Word_Matching_Network()
        self.Gating_Network = Gating_Network()
        self.Score_Aggregation = Score_Aggregation()

    def call(self, inputs, idf):
        Z = self.Word_Matching_Network(inputs)
        G = self.Gating_Network(idf)
        score = self.Score_Aggregation(Z, G)
        return score

In [23]:
class Pairwise_DRMM(tf.keras.Model):
    def __init__(self):
        super(Pairwise_DRMM, self).__init__(name='Pairwise_DRMM')
        self.drmm = DRMM()
        
    def call(self, inputs):
        positive_hist = inputs['positive_hist']
        negative_hist = inputs['negative_hist']
        query_idf = inputs['query_idf']
        
        positive = self.drmm(positive_hist, query_idf)
        negative = self.drmm(negative_hist, query_idf)
        
        return tf.concat([positive, negative], axis=0) 
    
    def predict(self, inputs):
        hist = inputs['hist']
        query_idf = inputs['query_idf']
        score = self.drmm(hist, query_idf)
        return score

In [7]:
def Pairwise_ranking_loss(y_true, y_pred):
    '''
    ignore y_true
    '''
    positive_score = tf.keras.layers.Lambda(lambda x: x[:len(x)//2], output_shape= (1,))(y_pred)
    negative_score = tf.keras.layers.Lambda(lambda x: x[len(x)//2:], output_shape= (1,))(y_pred)

    return tf.keras.backend.mean(tf.math.maximum(0, 1 - positive_score + negative_score))

In [19]:
test = pd.read_csv('./data/paccr_drmm_test.csv', converters={"hist": literal_eval,
                                                             "query_idf": literal_eval})

In [18]:
def ndcg(rel_pred, p=None, form="linear"):
    if p==None:
        p = len(rel_pred)
    if p > len(rel_pred):
        rel_pred = np.append(rel_pred, [0]*(p - len(rel_pred)))
    
    rel_true = np.sort(rel_pred)[::-1]
    discount = 1 / (np.log2(np.arange(p) + 2))

    if form == "linear":
        idcg = np.sum(rel_true[:p] * discount)
        dcg = np.sum(rel_pred[:p] * discount)
    elif form == "exponential" or form == "exp":
        idcg = np.sum([2**x - 1 for x in rel_true[:p]] * discount)
        dcg = np.sum([2**x - 1 for x in rel_pred[:p]] * discount)
    else:
        raise ValueError("Only supported for two formula, 'linear' or 'exp'")
    
    return dcg / idcg

In [8]:
df = pd.read_csv('./data/paccr_drmm_train.csv', converters={"positive_hist": literal_eval, 
                                                            "negative_hist": literal_eval,
                                                            "query_idf": literal_eval})

df = df[['query_len', 'query_preprocessed', 'positive_hist', 'negative_hist', 'query_idf']]

In [9]:
train_q = set(random.sample(list(df['query_preprocessed'].unique()), int(len(df['query_preprocessed'].unique()) * 0.9)))
dev_q = set(df['query_preprocessed'].unique()) - train_q

In [10]:
train = pd.concat([df.groupby('query_preprocessed').get_group(name) for name in train_q])
dev = pd.concat([df.groupby('query_preprocessed').get_group(name) for name in dev_q])

In [11]:
train = train.sample(frac=1).reset_index(drop=True)
dev = dev.sample(frac=1).reset_index(drop=True)

In [12]:
del df

In [13]:
train_tensor_dict = {'query_idf': tf.RaggedTensor.from_row_lengths(values=[y for x in train.query_idf.tolist() for y in x], row_lengths=train.query_len.tolist()),
                     'positive_hist': tf.RaggedTensor.from_row_lengths(values=[y for x in train.positive_hist.tolist() for y in x], row_lengths=train.query_len.tolist()),
                     'negative_hist': tf.RaggedTensor.from_row_lengths(values=[y for x in train.negative_hist.tolist() for y in x], row_lengths=train.query_len.tolist())}

In [14]:
dev_tensor_dict = {'query_idf': tf.RaggedTensor.from_row_lengths(values=[y for x in dev.query_idf.tolist() for y in x], row_lengths=dev.query_len.tolist()),
                   'positive_hist': tf.RaggedTensor.from_row_lengths(values=[y for x in dev.positive_hist.tolist() for y in x], row_lengths=dev.query_len.tolist()), 
                   'negative_hist': tf.RaggedTensor.from_row_lengths(values=[y for x in dev.negative_hist.tolist() for y in x], row_lengths=dev.query_len.tolist())}

In [15]:
buffer_size = len(train)
del train, dev

In [16]:
ds = tf.data.Dataset.from_tensor_slices(train_tensor_dict)
ds = ds.shuffle(buffer_size=buffer_size)
batchs = 128
ds = ds.batch(batchs).repeat()
example_batch = next(iter(ds))

In [17]:
model = Pairwise_DRMM()
model(example_batch)
Pairwise_ranking_loss(y_true=None, y_pred=model(example_batch))

<tf.Tensor: shape=(), dtype=float32, numpy=1.0751271>

In [27]:
model = Pairwise_DRMM()
learning_rate = 1
print_step = 10
optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)

Pairwise_ranking_loss(y_true=None, y_pred=model(example_batch))

<tf.Tensor: shape=(), dtype=float32, numpy=0.98756063>

In [36]:
loss_sum = 0
ndcg_sum = 0
start = time.time()
for step, batch_train in enumerate(ds):
    
    with tf.GradientTape() as tape:
        logits = model(batch_train)
        loss_value = Pairwise_ranking_loss(y_true=None, y_pred=logits)
        loss_sum += loss_value 
        
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    if step % print_step == 0:
        current_loss_average = float(loss_sum)/print_step
        if step ==0:
            current_loss_average = loss_sum
        current_ndcg_average = float(ndcg_sum)/print_step
        
        logits_dev = model(dev_tensor_dict)
        current_loss_average_dev = Pairwise_ranking_loss(y_true=None, y_pred=logits_dev)
        loss_sum = 0
        
        for q in dev_q:
            try:
                ndcg_test = test[test['query_preprocessed'] == q].sample(20)
            except:
                continue

            metadata_ndcg = {'query_idf': tf.RaggedTensor.from_row_lengths(values=[y for x in ndcg_test.query_idf.tolist() for y in x], 
                                                                           row_lengths=ndcg_test.query_len.tolist()),
                             'hist': tf.RaggedTensor.from_row_lengths(values=[y for x in ndcg_test['hist'].tolist() for y in x], 
                                                                      row_lengths=ndcg_test.query_len.tolist())}
                     
            ndcg_test['rel'] = model.predict(metadata_ndcg).numpy()
            ndcg_sum += ndcg(list(ndcg_test.sort_values(by=['rel'], axis=0, ascending=False)['median_relevance']-1), p=20, form="exp")
        
        print("Training loss at step %d: %.5f, dev_loss : %.5f, nDCG@20 : %.5f"% (step, 
                                                                  current_loss_average, 
                                                                  current_loss_average_dev,
                                                                  ndcg_sum/23))
        
        print("Seen so far: %s train samples, learning rate: %.4f" % ((step + 1) * batchs, learning_rate))
        ndcg_sum = 0
        start = time.time()
    
    if step % 500 == 0:
        learning_rate *= .1
        optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)

Training loss at step 0: 1.00504, dev_loss : 0.99198, nDCG@20 : 0.81481
Seen so far: 128 train samples, learning rate: 0.1000
Training loss at step 10: 0.99364, dev_loss : 0.99138, nDCG@20 : 0.81982
Seen so far: 1408 train samples, learning rate: 0.0100
Training loss at step 20: 0.99365, dev_loss : 0.99080, nDCG@20 : 0.82481
Seen so far: 2688 train samples, learning rate: 0.0100
Training loss at step 30: 0.98336, dev_loss : 0.99006, nDCG@20 : 0.81909
Seen so far: 3968 train samples, learning rate: 0.0100
Training loss at step 40: 0.98599, dev_loss : 0.98985, nDCG@20 : 0.82428
Seen so far: 5248 train samples, learning rate: 0.0100
Training loss at step 50: 0.98437, dev_loss : 0.98959, nDCG@20 : 0.84412
Seen so far: 6528 train samples, learning rate: 0.0100
Training loss at step 60: 0.98602, dev_loss : 0.98915, nDCG@20 : 0.81865
Seen so far: 7808 train samples, learning rate: 0.0100
Training loss at step 70: 0.97347, dev_loss : 0.98861, nDCG@20 : 0.81373
Seen so far: 9088 train samples, 

KeyboardInterrupt: 

In [38]:
ndcg_test = test[test['query_preprocessed'] == 'nike flip flop']

metadata_ndcg = {'query_idf': tf.RaggedTensor.from_row_lengths(values=[y for x in ndcg_test.query_idf.tolist() for y in x], 
                                                               row_lengths=ndcg_test.query_len.tolist()),
                 'hist': tf.RaggedTensor.from_row_lengths(values=[y for x in ndcg_test['hist'].tolist() for y in x], 
                                                          row_lengths=ndcg_test.query_len.tolist())}

ndcg_test.insert(5, 'rel', model.predict(metadata_ndcg).numpy(), True)

ndcg_test.sort_values(by=['rel'], axis=0, ascending=False)[['query', 'product_title', 'median_relevance', 'rel']]

Unnamed: 0,query,product_title,median_relevance,rel
7694,nike flip flops,Girl's C9 by Champion Glory Memory Foam Sandal...,2,2.158058
4425,nike flip flops,Nike Aqua Motion Girls Flip Flops - Little Kid...,4,1.912583
841,nike flip flops,"Women's Nike 'SolarSoft 11' Flip Flop, Size 8 ...",4,1.844439
4361,nike flip flops,Nike Girls' Celso Girl Thong Sandals from Fini...,4,1.839383
8007,nike flip flops,Disney Frozen Girls Flip Flop Sandals - Blue,2,1.756189
1620,nike flip flops,"Women's Nike 'Solarsoft' Thong Sandal, Size 5 ...",3,1.737167
3572,nike flip flops,Girl's Circo Hailey Floral Print Flip Flop San...,2,1.716707
2601,nike flip flops,"Women's Nike 'SolarSoft 11' Flip Flop, Size 6 ...",4,1.695302
3989,nike flip flops,Nike Women's Celso Girl Thong Sandals from Fin...,4,1.664339
3096,nike flip flops,OP Girl's Woven Beach Flip Flop,3,1.663974
