In [5]:
import numpy as np
import tensorflow as tf
from tqdm import trange 
import logging
import os
import pandas as pd
%run "./utils.py"

params = {
    "max_query_words": 12,
    "max_passage_words": 50,
    "emb_dim": 50,
    "BATCH_SIZE": 100,
    "TotalTrainingdata":4717692,
    "TotalValidationdata":524188,
    "EPOCHS" : 200,  #Total number of epochs to run
    "num_classes": 2,
    "save_summary_steps": 100,
    "TEST_BATCH_SIZE": 1000,
    "SHUFFLE_BATCH_SIZE": 10000,
    "WeightMultiplierPosClass": 7.0
}

  from ._conv import register_converters as _register_converters


In [None]:
def cnn_network(queryfeatures, passagefeatures,reuse = False):
    
    #queryfeatures = tf.placeholder(tf.float32,shape=(None,max_query_words * emb_dim))
    #passagefeatures = tf.placeholder(tf.float32,shape=(None,max_passage_words * emb_dim))
    
    #global max_passage_words,max_query_words,emb_dim,num_classes

    def conv2D(x,W,strides):
        return tf.nn.conv2d(x,W,strides=strides,padding='VALID',name="conv2D")

    def maxPooling(x,k):
        return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='VALID',name = "maxPool")

    def getWeight(shape):
        return tf.get_variable(name = "weight",shape = shape, initializer=tf.initializers.truncated_normal(stddev= 0.1))

    def getBias(shape):
        return tf.get_variable(name = "bias",shape = shape, initializer=tf.initializers.constant(0.1))

    with tf.variable_scope('query/convLayer1'):
        #------Filter: [filter_height, filter_width, in_channels, out_channels]
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([2,5,1,64])
            bias = getBias([64])
        #------Input: [batch, in_height, in_width, in_channels]
        #x_query = tf.reshape(queryfeatures,[-1,max_query_words,emb_dim,1], name="ReshapeInputOp")  #input: ?,12,50,1
        #------Stride [batch,height,width,channel]
        convQuery1 = tf.nn.relu(conv2D(queryfeatures,weight,[1,1,1,1]) + bias)
        convQuery1_max = maxPooling(convQuery1,k = 2)
        print(convQuery1_max)

    with tf.variable_scope('query/convLayer2'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([2,5,64,32])
            bias = getBias([32])
        convQuery2 = tf.nn.relu(conv2D(convQuery1_max,weight,[1,1,1,1]) + bias)
        convQuery2_max = maxPooling(convQuery2,k = 2)
        print(convQuery2_max)
        
    with tf.variable_scope('query/convLayer3'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([1,5,32,16])
            bias = getBias([32])
        convQuery3 = tf.nn.relu(conv2D(convQuery2_max,weight,[1,1,1,1]) + bias)
        convQuery3_max = maxPooling(convQuery3,k = 2)
        print(convQuery3_max)
        
    with tf.variable_scope('query/denseLayer3'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([8*16,10])
            bias = getBias([10])
        dense = tf.reshape(convQuery2_max,[-1,2*8*2])
        denseQuery3 = tf.nn.relu(tf.matmul(dense,weight) + bias)
        print(denseQuery3)

    with tf.variable_scope('passage/convLayer1'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([2,5,1,64])
            bias = getBias([64])
        #x_passage = tf.reshape(passagefeatures,[-1,max_passage_words,emb_dim,1])
        convPassage1 = tf.nn.relu(conv2D(passagefeatures,weight,[1,1,1,1]) + bias)
        convPassage1_max = maxPooling(convPassage1,k = 3)
        print(convPassage1_max)

    with tf.variable_scope('passage/convLayer2'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([2,5,64,32])
            bias = getBias([32])
        convPassage2 = tf.nn.relu(conv2D(convPassage1_max,weight,[1,1,1,1]) + bias)
        convPassage2_max = maxPooling(convPassage2,k = 3)
        print(convPassage2_max)
        
    with tf.variable_scope('passage/convLayer3'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([2,2,32,16])
            bias = getBias([16])
        convPassage3 = tf.nn.relu(conv2D(convPassage2_max,weight,[1,1,1,1]) + bias)
        convPassage3_max =  tf.nn.avg_pool(convPassage3, ksize=[1, 4, 1, 1], strides=[1, 1, 1, 1],padding='VALID',name = "maxPool")
        print(convPassage3_max)
        
    with tf.variable_scope('mergeQueryPassage'):
        convQuery3_max = tf.reshape(convQuery3_max, [-1,8*16])
        convPassage3_max = tf.reshape(convPassage3_max, [-1,8*16])
        mergeOp = tf.multiply(convQuery3_max,convPassage3_max, name = "merge")
        print(mergeOp)
        
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([8*16,10])
            bias = getBias([10])
        
        mergeDense = tf.nn.relu(tf.matmul(mergeOp,weight) + bias)
        print(mergeDense)

    with tf.variable_scope('mergeQueryPassage/output'):
        with tf.variable_scope('shared',reuse=reuse):
            weight = getWeight([10,2])
            bias = getBias([2])
        
        output = tf.nn.relu(tf.matmul(mergeDense,weight) + bias)

    return output
    

In [51]:
#-----https://github.com/brightmart/text_classification/tree/master/a02_TextCNN
def cnn_network(queryfeatures, passagefeatures,is_training, reuse = False):
    filter_sizes = [2,3,4]
    num_filters = 128
    embed_size = 50
    featureTypes = ["query","passage"]
    sequence_length = {"query":12,"passage":50}
    def cnn_multiple_layers():
        # 2.=====>loop each filter size. for each filter, do:convolution-pooling layer(a.create filters,b.conv,c.apply nolinearity,d.max-pooling)--->
        # you can use:tf.nn.conv2d;tf.nn.relu;tf.nn.max_pool; feature shape is 4-d. feature is a new variable
        dense_outputs = []
        for _,featureType in enumerate(featureTypes):
            #  =====> Select feature matrix based on whether its query or passage
            featureMatrix = queryfeatures if featureType == "query" else passagefeatures
            pooled_outputs = []
            for i, filter_size in enumerate(filter_sizes):
                with tf.variable_scope(featureType + '_cnn_multiple_layers' + "convolution-pooling-%s" % filter_size,reuse = reuse):
                    # 1) CNN->BN->relu
                    filter = tf.get_variable("filter-%s" % filter_size,[filter_size, embed_size, 1, num_filters],initializer=tf.random_normal_initializer(stddev=0.1))
                    conv = tf.nn.conv2d(featureMatrix, filter, strides=[1, 1, 1, 1],padding="VALID",name="conv")  # shape:[batch_size,sequence_length - filter_size + 1,1,num_filters]
                    conv = tf.contrib.layers.batch_norm(conv, is_training=is_training, scope='cnn1')
                    #print(i, "conv1:", conv)
                    b = tf.get_variable("b-%s" % filter_size, [num_filters])  # ADD 2017-06-09
                    h = tf.nn.relu(tf.nn.bias_add(conv, b),"relu")  # shape:[batch_size,sequence_length-1,1,num_filters]. tf.nn.bias_add:adds `bias` to `value`

                    # 3. Max-pooling
                    pooling_max = tf.nn.max_pool(h, ksize=[1,sequence_length[featureType] - filter_size + 1, 1, 1],strides=[1, 1, 1, 1], padding='VALID', name="pool")
                    # pooling_avg=tf.squeeze(tf.reduce_mean(h,axis=1)) #[batch_size,num_filters]
                    #print(i, "pooling:", pooling_max)
                    # pooling=tf.concat([pooling_max,pooling_avg],axis=1) #[batch_size,num_filters*2]
                    pooled_outputs.append(pooling_max)  # h:[batch_size,sequence_length,1,num_filters]
            # concat
            h = tf.concat(pooled_outputs, axis=3)  # [batch_size,num_filters_total]
            #print("h.concat:", h)
            
            with tf.name_scope("dropout"):
                h = tf.nn.dropout(h,keep_prob=1.0)  # [batch_size,sequence_length - filter_size + 1,num_filters]
            #print(h.shape)
            h = tf.reshape(h,[-1,len(filter_sizes) * num_filters])
            #print("Reshaped: ",h.shape)
            h = tf.layers.dense(h, 100, activation=tf.nn.relu, use_bias=True,reuse = reuse, name = featureType + "_Reshape")
            
            #print("Dense:", h)
            dense_outputs.append(h)
        
        predictionsOp = tf.multiply(dense_outputs[0],dense_outputs[1], name = "merge")
        
        with tf.variable_scope("mergeLayer", reuse = reuse):
            predictions = tf.layers.dense(predictionsOp,units = 2,use_bias = True,activation=tf.nn.relu,reuse=reuse,name="predictions")
        
        return predictions
    return cnn_multiple_layers()

In [7]:
def modelTest_fn(mode,embeddingsFile,params):    
    #--------Hyper parameters:
    max_query_words = params["max_query_words"]
    max_passage_words = params["max_passage_words"]
    emb_dim = params["emb_dim"]
    BATCH_SIZE = params["TEST_BATCH_SIZE"]
    EPOCHS = params["EPOCHS"]
    num_classes = params["num_classes"]
    
    is_training = (mode == "train")
    
    def testDSParser(example_proto):
        features = {"query": tf.FixedLenFeature((max_query_words,emb_dim,1), tf.float32),
                  "passage": tf.FixedLenFeature((max_passage_words,emb_dim,1), tf.float32),
                  "query_id": tf.FixedLenFeature((1), tf.int64),
                  "passage_id": tf.FixedLenFeature((1), tf.int64)}
        parsed_features = tf.parse_single_example(example_proto, features)
        return parsed_features["query"], parsed_features["passage"],parsed_features["query_id"],parsed_features["passage_id"]
    
    def getDatasetIterator(fileName,batch_size,mode):
        dataset = tf.data.TFRecordDataset(filenames = fileName, compression_type="ZLIB")
        #------Follow this order: map -> prefetch -> batch
        dataset = dataset.map(testDSParser)
        dataset = dataset.batch(batch_size)
        
        iterator = dataset.make_initializable_iterator()
        return iterator
    
    iterator = getDatasetIterator(embeddingsFile,BATCH_SIZE,mode)
    
    queryfeatures,passagefeatures,query_id,passage_id = iterator.get_next()
    
    model_spec =     {
        'queryfeatures': queryfeatures,
        'passagefeatures': passagefeatures,
        'iterator_init_op': iterator.initializer,
        "query_id":query_id,
        "passage_id": passage_id
    }
    
    with tf.variable_scope('model'):
        y_conv = cnn_network(queryfeatures,passagefeatures,is_training,reuse = False)
    
    # -----------------------------------------------------------
    # MODEL SPECIFICATION
    # Create the model specification and return it
    # It contains nodes or operations in the graph that will be used for training and evaluation
    variable_init_op = tf.group(*[tf.global_variables_initializer()])
    model_spec['variable_init_op'] = variable_init_op
    model_spec["predictions"] = y_conv
    model_spec['summary_op'] = tf.summary.merge_all()

    
    return model_spec

In [8]:
def model_fn(mode,embeddingsFile,params):    
    #--------Hyper parameters:
    max_query_words = params["max_query_words"]
    max_passage_words = params["max_passage_words"]
    emb_dim = params["emb_dim"]
    BATCH_SIZE = params["BATCH_SIZE"]
    SHUFFLE_BATCH_SIZE = params["SHUFFLE_BATCH_SIZE"]
    EPOCHS = params["EPOCHS"]
    num_classes = params["num_classes"]
    
    is_training = (mode == "train")
    
    def parseEachRecord(record):
        features = {"query": tf.FixedLenFeature((max_query_words,emb_dim,1), tf.float32),
                  "passage": tf.FixedLenFeature((max_passage_words,emb_dim,1), tf.float32),
                  "label": tf.FixedLenFeature((num_classes), tf.int64)}
        parsed_features = tf.parse_single_example(record, features)
        return parsed_features["query"], parsed_features["passage"],parsed_features["label"]

    def parser(fileName):
        dataset = tf.data.TFRecordDataset(filenames = fileName, compression_type="ZLIB")
        parsed_features = dataset.map(parseEachRecord)
        return parsed_features
    
    def getDatasetIterator(folderPath,batch_size,SHUFFLE_BATCH_SIZE,mode):
        files = [os.path.join(folderPath, f) for f in os.listdir(folderPath)]
        dataset = tf.data.Dataset.from_tensor_slices(files).shuffle(buffer_size = len(files))
        #dataset = tf.data.TFRecordDataset(filenames = fileName, compression_type="ZLIB").shuffle(buffer_size = len(files))
        #------Follow this order: map -> prefetch -> batch
        dataset = dataset.flat_map(parser)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(1)
        iterator = dataset.make_initializable_iterator()
        return iterator
    
   
    iterator = getDatasetIterator(embeddingsFile,BATCH_SIZE,SHUFFLE_BATCH_SIZE,mode)
    
    queryfeatures,passagefeatures,y = iterator.get_next()
    
    model_spec =     {
        'queryfeatures': queryfeatures[0],
        'passagefeatures': passagefeatures[0],
        'iterator_init_op': iterator.initializer,
        "y":y[0]
    }
    
    with tf.variable_scope('model'):
        y_conv = cnn_network(queryfeatures,passagefeatures,is_training,reuse = not is_training)
    
    with tf.variable_scope('lossPerBatch'):
        tmpMultiplier = params["WeightMultiplierPosClass"]
        weights = tf.multiply(tmpMultiplier, tf.cast(tf.argmax(y,-1),tf.float32)) + 1
        cross_entropy = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=y,logits=y_conv,weights = weights))
        #cross_entropy = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(targets=tf.cast(y,tf.float32),logits=y_conv,pos_weight = 9.0))
        tf.summary.scalar('lossPerBatch', cross_entropy)

    if is_training:
        with tf.name_scope('AdamOptim'):
            global_step = tf.train.get_or_create_global_step()
            train_step = tf.train.AdamOptimizer(1e-4).minimize(loss = cross_entropy,global_step=global_step)

    correct_pred = tf.equal(tf.argmax(y_conv,1),tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
    tf.summary.scalar('accuracyPerBatch', accuracy)
    
    
    # -----------------------------------------------------------
    # METRICS AND SUMMARIES
    # Metrics for evaluation using tf.metrics (average over whole dataset)
    with tf.variable_scope("metrics"):
        metrics = {
            'accuracy': tf.metrics.accuracy(labels=tf.argmax(y,-1), predictions=tf.argmax(y_conv,-1)),
            'loss': tf.metrics.mean(cross_entropy),
            'auc':tf.metrics.auc(labels=tf.argmax(y,-1), predictions=tf.argmax(y_conv,-1))
        }
    
    accuracyOverall,_ = metrics["accuracy"]
    lossOverall,_ = metrics["loss"]
    aucOVerall,_ = metrics["auc"]
    
    tf.summary.scalar('accuracy', accuracyOverall)
    tf.summary.scalar('loss',lossOverall )
    tf.summary.scalar('auc', aucOVerall)
    
    # Group the update ops for the tf.metrics
    update_metrics_op = tf.group(*[op for _, op in metrics.values()])

    # Get the op to reset the local variables used in tf.metrics
    metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
    metrics_init_op = tf.variables_initializer(metric_variables)
    
    
    # -----------------------------------------------------------
    # MODEL SPECIFICATION
    # Create the model specification and return it
    # It contains nodes or operations in the graph that will be used for training and evaluation
    variable_init_op = tf.group(*[tf.global_variables_initializer()])
    model_spec['variable_init_op'] = variable_init_op
    model_spec["predictions"] = y_conv
    model_spec['loss'] = cross_entropy
    model_spec['accuracy'] = accuracy
    model_spec['metrics_init_op'] = metrics_init_op
    model_spec['metrics'] = metrics
    model_spec['update_metrics'] = update_metrics_op
    model_spec['summary_op'] = tf.summary.merge_all()

    if is_training:
        model_spec['train_op'] = train_step
    
    return model_spec

In [9]:
def train_sess(sess, model_spec, num_steps, writer, params):
    """Train the model on `num_steps` batches

    Args:
        sess: (tf.Session) current session
        model_spec: (dict) contains the graph operations or nodes needed for training
        num_steps: (int) train for this number of batches
        writer: (tf.summary.FileWriter) writer for summaries
        params: (Params) hyperparameters
    """
    # Get relevant graph operations or nodes needed for training
    loss = model_spec['loss']
    train_op = model_spec['train_op']
    update_metrics = model_spec['update_metrics']
    metrics = model_spec['metrics']
    summary_op = model_spec['summary_op']
    y_conv = model_spec["predictions"]
    global_step = tf.train.get_global_step()

    # Load the training dataset into the pipeline and initialize the metrics local variables
    sess.run(model_spec['iterator_init_op'])
    sess.run(model_spec['metrics_init_op'])

    # Use tqdm for progress bar
    t = trange(num_steps)
    for i in t:
        # Evaluate summaries for tensorboard only once in a while
        if i % params["save_summary_steps"] == 0:
            # Perform a mini-batch update
            _, _, loss_val, summ, global_step_val = sess.run([train_op, update_metrics, loss,summary_op, global_step])
            # Write summaries for tensorboard
            #print("Global Step: ",global_step_val)
            writer.add_summary(summ, global_step_val)
        else:
            _, _, loss_val = sess.run([train_op, update_metrics, loss])
        # Log the loss in the tqdm progress bar
        t.set_postfix(loss='{:05.3f}'.format(loss_val))
        #print("Predictions",sess.run([y_conv[0:10,]]))

In [10]:
def evaluate_sess(sess, model_spec, num_steps, writer=None, params=None):
    """Train the model on `num_steps` batches.

    Args:
        sess: (tf.Session) current session
        model_spec: (dict) contains the graph operations or nodes needed for training
        num_steps: (int) train for this number of batches
        writer: (tf.summary.FileWriter) writer for summaries. Is None if we don't log anything
        params: (Params) hyperparameters
    """
    update_metrics = model_spec['update_metrics']
    eval_metrics = model_spec['metrics']
    global_step = tf.train.get_global_step()

    # Load the evaluation dataset into the pipeline and initialize the metrics init op
    sess.run(model_spec['iterator_init_op'])
    sess.run(model_spec['metrics_init_op'])

    # compute metrics over the dataset
    for _ in range(num_steps):
        sess.run(update_metrics)

    # Get the values of the metrics
    metrics_values = {k: v[0] for k, v in eval_metrics.items()}
    metrics_val = sess.run(metrics_values)
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_val.items())
    logging.info("- Eval metrics: " + metrics_string)

    # Add summaries manually to writer at global_step_val
    if writer is not None:
        global_step_val = sess.run(global_step)
        for tag, val in metrics_val.items():
            summ = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=val)])
            writer.add_summary(summ, global_step_val)

    return metrics_val

In [11]:
def evaluate(model_spec, model_dir, params, restore_from):
    import pandas as pd
    """Evaluate the model

    Args:
        model_spec: (dict) contains the graph operations or nodes needed for evaluation
        model_dir: (string) directory containing config, weights and log
        params: (Params) contains hyperparameters of the model.
                Must define: num_epochs, train_size, batch_size, eval_size, save_summary_steps
        restore_from: (string) directory or file containing weights to restore the graph
    """
    # Initialize tf.Saver
    saver = tf.train.Saver()
    
    df = pd.DataFrame()
    with tf.Session() as sess:
        # Initialize the lookup table
        #sess.run(model_spec['variable_init_op'])
        # Reload weights from the weights subdirectory
        save_path = os.path.join(model_dir, restore_from)
        if os.path.isdir(save_path):
            save_path = tf.train.latest_checkpoint(save_path)
        saver.restore(sess, save_path)
        
        sess.run(model_spec['iterator_init_op'])
        totalBatches = (104170//params["TEST_BATCH_SIZE"]) + 1
        
        for index in range(totalBatches):        
            predictions,query_id,passage_id = sess.run([model_spec["predictions"],model_spec["query_id"],model_spec["passage_id"]])
            
            tmp = pd.DataFrame({"query_id":query_id[:,0],"passage_id":passage_id[:,0],"predictions":predictions[:,1]})
            df = pd.concat([df,tmp],axis = 0)
            #print(query_id[0:100])
    
    return df

In [12]:
def train_and_evaluate(train_model_spec, eval_model_spec, model_dir, params):
    """Train the model and evaluate every epoch.

    Args:
        train_model_spec: (dict) contains the graph operations or nodes needed for training
        eval_model_spec: (dict) contains the graph operations or nodes needed for evaluation
        model_dir: (string) directory containing config, weights and log
        params: (Params) contains hyperparameters of the model.
                Must define: num_epochs, train_size, batch_size, eval_size, save_summary_steps
        restore_from: (string) directory or file containing weights to restore the graph
    """
    
    max_query_words = params["max_query_words"]
    max_passage_words = params["max_passage_words"]
    emb_dim = params["emb_dim"]
    BATCH_SIZE = params["BATCH_SIZE"]
    EPOCHS = params["EPOCHS"]
    TotalTrainingdata = params["TotalTrainingdata"]
    num_classes = params["num_classes"]
    
    # Initialize tf.Saver instances to save weights during training
    last_saver = tf.train.Saver() # will keep last 5 epochs
    best_saver = tf.train.Saver(max_to_keep=1)  # only keep 1 best checkpoint (best on eval)
    begin_at_epoch = 0
    
    with tf.Session() as sess:
        # Initialize model variables
        sess.run(train_model_spec['variable_init_op'])

        # For tensorboard (takes care of writing summaries to files)
        train_writer = tf.summary.FileWriter(os.path.join(model_dir, 'train_summaries'), sess.graph)
        eval_writer = tf.summary.FileWriter(os.path.join(model_dir, 'eval_summaries'), sess.graph)

        best_eval_acc = 0.0
        for epoch in range(EPOCHS):
            # Run one epoch
            logging.info("Epoch {}/{}".format(epoch + 1, EPOCHS))
            
            numSteps = TotalTrainingdata // BATCH_SIZE
            train_sess(sess, train_model_spec, numSteps, train_writer, params)

            # Save weights
            last_save_path = os.path.join(model_dir, 'last_weights', 'after-epoch')
            last_saver.save(sess, last_save_path, global_step=epoch + 1)

            metrics = evaluate_sess(sess, eval_model_spec, 1, eval_writer)

            # If best_eval, best_save_path
            eval_acc = metrics['auc']
            if eval_acc >= best_eval_acc:
                # Store new best accuracy
                best_eval_acc = eval_acc
                # Save weights
                best_save_path = os.path.join(model_dir, 'best_weights', 'after-epoch')
                best_save_path = best_saver.save(sess, best_save_path, global_step=epoch + 1)
                logging.info("- Found new best accuracy, saving in {}".format(best_save_path))
                # Save best eval metrics in a json file in the model directory
                best_json_path = os.path.join(model_dir, "metrics_eval_best_weights.json")
                save_dict_to_json(metrics, best_json_path)

            # Save latest eval metrics in a json file in the model directory
            last_json_path = os.path.join(model_dir, "metrics_eval_last_weights.json")
            save_dict_to_json(metrics, last_json_path)

In [52]:
#-----------------Main function for training

tf.reset_default_graph()
train_model_spec = model_fn("train","./TrainData",params)
eval_model_spec = model_fn("eval","./ValidationData",params)
logging.info("Starting training for {} epoch(s)".format(params["EPOCHS"]))
train_and_evaluate(train_model_spec, eval_model_spec, "./ModelLogs", params)

100%|███████████████████████████████████████████████████████████████████| 47176/47176 [26:19<00:00, 30.00it/s, loss=1.033]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:41<00:00, 30.60it/s, loss=1.324]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:37<00:00, 30.69it/s, loss=1.178]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:12<00:00, 31.18it/s, loss=1.227]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:18<00:00, 31.08it/s, loss=1.130]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:26<00:00, 30.91it/s, loss=1.081]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:13<00:00, 31.16it/s, loss=1.324]
100%|███████████████████████████████████████████████████████████████████| 47176/47176 [25:41<00:00, 30.60it/s, loss=1.130]
100%|███████████

KeyboardInterrupt: 

In [53]:
#-----------------Main function for testing

tf.reset_default_graph()
eval_model_spec = modelTest_fn("test","./evalUnlabelledEmbeddings.tfrecords",params)
logging.info("Starting training for {} epoch(s)".format(params["EPOCHS"]))
df = evaluate(eval_model_spec, "./ModelLogs", params,"best_weights")

import csv
tmp = (df.groupby('query_id')['predictions']
       .apply(lambda x: "\t".join([format(val, "0.2f") for val in x]))
       .reset_index())

tmp.to_csv("./answer.tsv",index=False,sep= "\t",header=None,quoting=csv.QUOTE_NONE,quotechar="",  escapechar="\\")

INFO:tensorflow:Restoring parameters from ./ModelLogs\best_weights\after-epoch-10


INFO:tensorflow:Restoring parameters from ./ModelLogs\best_weights\after-epoch-10


In [None]:
#df.head(100)
#map(str,x)
import csv
tmp = (df.groupby('query_id')['predictions']
       .apply(lambda x: "\t".join([format(val, "0.2f") for val in x]))
       .reset_index())

tmp.to_csv("./answer.tsv",index=False,sep= "\t",header=None,quoting=csv.QUOTE_NONE,quotechar="",  escapechar="\\")

In [None]:
df[df["query_id"] == 89]["predictions"].apply(lambda x: "\t".join(format(x, "0.2f")))

In [None]:
tmp.head()