In [1]:
import numpy as np
import multiprocessing as mp
import random,copy,string
from nltk.tokenize import word_tokenize
from scipy.stats import pearsonr
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, Convolution1D, MaxPooling1D, Flatten
from tensorflow.python.keras.layers import Lambda, multiply, concatenate, Dense
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.callbacks import Callback
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

class Embedder(object):
    def __init__(self, dictname, wordvectdim):
        print('Loading GloVe...(This might take one or two minutes.)')
        self.wordtoindex   = dict()
        self.indextovector = []
        self.indextovector.append(np.zeros(wordvectdim))
        lines = open(dictname, 'r').readlines()
        blocksize = 1000
        r_list = mp.Pool(32).map(self._worker, ((lines[block:block+blocksize], block) for block in range(0,len(lines),blocksize)))
        for r in r_list:
          self.wordtoindex.update(r[0])
          self.indextovector.extend(r[1])
        self.indextovector = np.array(self.indextovector, dtype='float32')
    def _worker(self,args):
        wordtoindex   = dict()
        indextovector = []
        for line in args[0]:
            elements = line.split(' ')
            wordtoindex[elements[0]] = len(indextovector)+args[1]+1
            indextovector.append(np.array(elements[1:]).astype(float))
        return (wordtoindex,indextovector)
    def matrixize(self, sentencelist, sentencepad):
        indexlist = []
        for sentence in sentencelist:
            indexes = []
            for word in sentence:
                word = word.lower()
                if word not in self.wordtoindex: indexes.append(1)
                else: indexes.append(self.wordtoindex[word])
            indexlist.append(indexes)
        return self.indextovector[(pad_sequences(indexlist, maxlen=sentencepad, truncating='post', padding='post'))]

class STSTask():
    def __init__(self, c):
        self.c = c
    def load_resc(self,dictname):
        self.embed = Embedder(dictname, self.c['wordvectdim'])
    def load_data(self, trainfile, validfile, testfile):
        self.traindata= self._load_data(trainfile)
        self.validdata= self._load_data(validfile)
        self.testdata = self._load_data(testfile)
    def _load_data(self, filename):
        s0,s1,labels = [],[],[]
        lines=open(filename,'r').read().splitlines()
        for line in lines:
            _,_,_,_, label, s0x, s1x = line.rstrip().split('\t')[:7]
            labels.append(float(label))
            s0.append([word.lower() for word in word_tokenize(s0x) if word not in string.punctuation])
            s1.append([word.lower() for word in word_tokenize(s1x) if word not in string.punctuation])
        m0 = self.embed.matrixize(s0, self.c['sentencepad'])
        m1 = self.embed.matrixize(s1, self.c['sentencepad'])
        classes = np.zeros((len(labels), self.c['num_classes']))
        for i, label in enumerate(labels):
            if np.floor(label) + 1 < self.c['num_classes']:
                classes[i, int(np.floor(label)) + 1] = label - np.floor(label)
            classes[i, int(np.floor(label))] = np.floor(label) - label + 1
        return {'labels': labels, 's0': s0, 's1': s1, 'classes': classes, 'm0': m0, 'm1': m1}

    def create_model(self):
        K.clear_session()
        input0 = Input(shape=(self.c['sentencepad'], self.c['wordvectdim']))
        input1 = Input(shape=(self.c['sentencepad'], self.c['wordvectdim']))
        Convolt_Layer=[]
        MaxPool_Layer=[]
        Flatten_Layer=[]
        for kernel_size, filters in self.c['cnnfilters'].items():
            Convolt_Layer.append(Convolution1D(filters=filters,
                                               kernel_size=kernel_size,
                                               padding='valid',
                                               activation=self.c['cnnactivate'],
                                               kernel_initializer=self.c['cnninitial']))
            MaxPool_Layer.append(MaxPooling1D(pool_size=int(self.c['sentencepad']-kernel_size+1)))
            Flatten_Layer.append(Flatten())
        Convolted_tensor0=[]
        Convolted_tensor1=[]
        for channel in range(len(self.c['cnnfilters'])):
            print("inside convo: ")
            print(input0.shape)
            print(input1.shape)
            Convolted_tensor0.append(Convolt_Layer[channel](input0))
            Convolted_tensor1.append(Convolt_Layer[channel](input1))
            print(Convolt_Layer[channel](input0).shape)
            print(Convolt_Layer[channel](input1).shape)
        MaxPooled_tensor0=[]
        MaxPooled_tensor1=[]
        for channel in range(len(self.c['cnnfilters'])):
            print("inside max")
            MaxPooled_tensor0.append(MaxPool_Layer[channel](Convolted_tensor0[channel]))
            MaxPooled_tensor1.append(MaxPool_Layer[channel](Convolted_tensor1[channel]))
            print(MaxPool_Layer[channel](Convolted_tensor0[channel]).shape)
            print(MaxPool_Layer[channel](Convolted_tensor1[channel]).shape)
            
        Flattened_tensor0=[]
        Flattened_tensor1=[]
        for channel in range(len(self.c['cnnfilters'])):
            Flattened_tensor0.append(Flatten_Layer[channel](MaxPooled_tensor0[channel]))
            Flattened_tensor1.append(Flatten_Layer[channel](MaxPooled_tensor1[channel]))
            print("inside flat")
            print(Flatten_Layer[channel](MaxPooled_tensor0[channel]).shape)
            print(Flatten_Layer[channel](MaxPooled_tensor1[channel]).shape)
            
        if len(self.c['cnnfilters']) > 1:
            Flattened_tensor0=concatenate(Flattened_tensor0)
            Flattened_tensor1=concatenate(Flattened_tensor1)
        else:
            Flattened_tensor0=Flattened_tensor0[0]
            Flattened_tensor1=Flattened_tensor1[0]
            
        absDifference = Lambda(lambda X:K.abs(X[0] - X[1]))([Flattened_tensor0,Flattened_tensor1])
        
        mulDifference = multiply([Flattened_tensor0,Flattened_tensor1])
        allDifference = concatenate([absDifference,mulDifference])
        print(mulDifference.shape)
        print(allDifference.shape)
        for ilayer, densedimension in enumerate(self.c['densedimension']):
            allDifference = Dense(units=int(densedimension), 
                                  activation=self.c['denseactivate'], 
                                  kernel_initializer=self.c['denseinitial'])(allDifference)
            print("inside dense")
            print(allDifference.shape)
        output = Dense(name='output',
                       units=self.c['num_classes'],
                       activation='softmax', 
                       kernel_initializer=self.c['denseinitial'])(allDifference)
        print("kd")
        print(output.shape)
        self.model = Model(inputs=[input0,input1], outputs=output)
        self.model.compile(loss={'output': self._lossfunction}, optimizer=self.c['optimizer'])
    def _lossfunction(self,y_true,y_pred):
        ny_true = y_true[:,1] + 2*y_true[:,2] + 3*y_true[:,3] + 4*y_true[:,4] + 5*y_true[:,5]
        ny_pred = y_pred[:,1] + 2*y_pred[:,2] + 3*y_pred[:,3] + 4*y_pred[:,4] + 5*y_pred[:,5]
        my_true = K.mean(ny_true)
        my_pred = K.mean(ny_pred)
        var_true = (ny_true - my_true)**2
        var_pred = (ny_pred - my_pred)**2
        return -K.sum((ny_true-my_true)*(ny_pred-my_pred),axis=-1) / (K.sqrt(K.sum(var_true,axis=-1)*K.sum(var_pred,axis=-1)))

    def eval_model(self):
        results = []
        for data in [self.traindata, self.validdata, self.testdata ]:
            predictionclasses = []
            for dataslice,_ in self._sample_pairs(data, len(data['classes']), shuffle=False, once=True):
                
                predictionclasses += list(self.model.predict(dataslice))
            prediction = np.dot(np.array(predictionclasses),np.arange(self.c['num_classes']))
            goldlabels = data['labels']
            result=pearsonr(prediction, goldlabels)[0]
            results.append(round(result,4))
        print('[Train, Valid, Test]=',end='')
        print(results)
        return tuple(results)
    
    def eval_model1(self):
        results = []
        for data in [self.testdata]:
            predictionclasses = []
            for dataslice,_ in self._sample_pairs(data, len(data['classes']), shuffle=False, once=True):
                predictionclasses += list(self.model.predict(dataslice))
            prediction = np.dot(np.array(predictionclasses),np.arange(self.c['num_classes']))
            goldlabels = data['labels']
            result=pearsonr(prediction, goldlabels)[0]
            results.append(round(result,4))
        print('[Test]=',end='')
        print(results)
        return (results,prediction)
    
    def eval_model2(self):
        results = []
        for data in [self.traindata]:
            predictionclasses = []
            for dataslice,_ in self._sample_pairs(data, len(data['classes']), shuffle=False, once=True):
                predictionclasses += list(self.model.predict(dataslice))
            prediction = np.dot(np.array(predictionclasses),np.arange(self.c['num_classes']))
            goldlabels = data['labels']
            result=pearsonr(prediction, goldlabels)[0]
            results.append(round(result,4))
        print('[Test]=',end='')
        print(results)
        return (results,prediction)
    
    def fit_model(self, wfname):
        kwargs = dict()
        kwargs['generator']       = self._sample_pairs(self.traindata, self.c['batch_size'])
        kwargs['steps_per_epoch'] = self.c['num_batchs']
        kwargs['epochs']          = self.c['num_epochs']
        class Evaluate(Callback):
            def __init__(self, task, wfname):
                self.task       = task
                self.bestresult = 0.0
                self.wfname     = wfname
            def on_epoch_end(self, epoch, logs={}):
                print("Inside On epoch end")
                _,validresult = self.task.eval_model()
                if validresult > self.bestresult:
                    self.bestresult = validresult
                    self.task.model.save(self.wfname)
        kwargs['callbacks'] = [Evaluate(self, wfname)]
        return self.model.fit_generator(verbose=1,**kwargs)
    def _sample_pairs(self, data, batch_size, shuffle=True, once=False):
        #print("shuffle= ",shuffle)
        #print("once= ",once)
        num = len(data['classes'])
        #print("num or len(data['classes'])= ",num)
        idN = int((num+batch_size-1) / batch_size)
        #print("batch_size",batch_size)
        #print("idN= ",idN)
        ids = list(range(num))
        #print("ids= ",ids)
        c=0
        while True:
            if shuffle: random.shuffle(ids)
            c=c+1
            print("counter",c)
            datacopy= copy.deepcopy(data)
            for name, value in datacopy.items():
                valuer=copy.copy(value)
                for i in range(num):
                    valuer[i]=value[ids[i]]
                datacopy[name] = valuer
            for i in range(idN):
                sl  = slice(i*batch_size, (i+1)*batch_size)
                print("sl", sl)
                dataslice= dict()
                for name, value in datacopy.items():
                    dataslice[name] = value[sl]
                x = [dataslice['m0'],dataslice['m1']]
                y = dataslice['classes']
                yield (x,y)
            if once: break

c = dict()
c['num_runs']   = 3
c['num_epochs'] = 64
c['num_batchs'] = 2
c['batch_size'] = 3000
c['wordvectdim']  = 300
c['sentencepad']  = 60
c['num_classes']  = 6
c['cnnfilters']     = {1: 1800}
c['cnninitial']     = 'he_uniform'
c['cnnactivate']    = 'relu'
c['densedimension'] = list([1800])
c['denseinitial']   = 'he_uniform'
c['denseactivate']  = 'tanh'
c['optimizer']  = 'adam'

if __name__ == "__main__":
    tsk = STSTask(c)
    tsk.load_resc('glove.840B.300d.txt')
    tsk.load_data('sts-train.csv', 'sts-dev.csv', 'sts-test.csv')
    bestresult = 0.0
    bestwfname = None
    for i_run in range(tsk.c['num_runs']):
        print('RunID: %s' %i_run)
        tsk.create_model()
        print('Training')   
        wfname = './weightfile'+str(i_run)
        tsk.fit_model(wfname)
        print('Prediction(best valid epoch)')
        tsk.model.load_weights(wfname)
        _,validresult = tsk.eval_model() 
        if validresult>bestresult:
            bestresult = validresult
            bestwfname = wfname
    print('Prediction(best run)', bestwfname)
    tsk.model.load_weights(bestwfname)
    tsk.eval_model()

Loading GloVe...(This might take one or two minutes.)
RunID: 0
inside convo: 
(?, 60, 300)
(?, 60, 300)
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
(?, 60, 1800)
(?, 60, 1800)
inside max
(?, 1, 1800)
(?, 1, 1800)
inside flat
(?, 1800)
(?, 1800)
(?, 1800)
(?, 3600)
inside dense
(?, 1800)
kd
(?, 6)
Training
Epoch 1/64
counter 1
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 2
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 3
counter 1
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 4
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 5
[Train, Valid]=[0.6144, 0.6836]
Epoch 2/64
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 6
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
sl slice(3000, 6000, None)
counter 7
counter 1
sl slice(0, 1500, No

counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.7988, 0.7847]
Epoch 23/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8018, 0.7857]
Epoch 24/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8198, 0.7844]
Epoch 25/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.832, 0.7977]
Epoch 26/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8406, 0.8015]
Epoch 27/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8445, 0.7911]
Epoch 28/64
sl slice

Epoch 48/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9693, 0.8087]
Epoch 49/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9638, 0.7835]
Epoch 50/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9748, 0.7876]
Epoch 51/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9752, 0.8123]
Epoch 52/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9767, 0.8034]
Epoch 53/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749

Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.6901, 0.6861]
Epoch 8/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.71, 0.7066]
Epoch 9/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.7195, 0.7259]
Epoch 10/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.7357, 0.7301]
Epoch 11/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.745, 0.7298]
Epoch 12/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl

Epoch 33/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.93, 0.8067]
Epoch 34/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9415, 0.813]
Epoch 35/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9399, 0.8095]
Epoch 36/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9528, 0.8154]
Epoch 37/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9473, 0.8055]
Epoch 38/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, N

sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9954, 0.8087]
Epoch 59/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9959, 0.8099]
Epoch 60/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9956, 0.8093]
Epoch 61/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9963, 0.8081]
Epoch 62/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.9959, 0.8108]
Epoch 63/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 5749, None)
sl slice(0, 3000, None)
counter 1
sl slice(0, 1500, None)
[Tr

Epoch 18/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.815, 0.7749]
Epoch 19/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8326, 0.7796]
Epoch 20/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8565, 0.7925]
Epoch 21/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.8693, 0.7987]
Epoch 22/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, None)
sl slice(0, 5749, None)
counter 1
sl slice(0, 1500, None)
[Train, Valid]=[0.872, 0.7959]
Epoch 23/64
sl slice(3000, 6000, None)
Inside On epoch end
counter 1
sl slice(0, 3000, 

In [2]:
tsk.model.load_weights(bestwfname)
tsk.eval_model1()

counter 1
sl slice(0, 1379, None)
[Test]=[0.7701]


(0.7701,)