In [1]:
import csv
import torch
from torch import optim
import random 
import sys
from pytorch_transformers.tokenization_distilbert import DistilBertTokenizer
from pytorch_transformers.modeling_distilbert import DistilBertModel
from scipy.special import softmax
import getopt

class Classifier(torch.nn.Module):
    def __init__(self, esz=1536, num_layers=5):
        super().__init__()
        self.num_layers = num_layers
        self.dense = [torch.nn.Linear(
            esz, int(esz),
        ).cuda() for i in range(num_layers)]
        
        self.relu = [torch.nn.ReLU().cuda() for i in range(num_layers)]
        self.out = torch.nn.Linear(
            int(esz), 2,
        ).cuda()
        
        self.bn = torch.nn.BatchNorm1d(1).cuda()
        
    def forward(self, input, hidden=None):
        for i in range(self.num_layers):
            input = self.relu[i](self.dense[i](input.cuda()))
        return self.bn(self.out(input))

class Attention(torch.nn.Module):    
    def __init__(self, esz=768, seq_len=20):
        super().__init__()
        self.dense = torch.nn.Linear(
            esz, 1
        ).cuda()
        self.relu = torch.nn.ReLU().cuda()
        self.sm = torch.nn.Softmax().cuda()
        self.attn = torch.nn.MultiheadAttention(esz, 8).cuda()
        #self.bn = torch.nn.BatchNorm1d(seq_len).cuda()
        
    def forward(self, input, hidden=None):
        #return self.sm(self.dense(input.cuda()).cuda()).cuda()
        out, _ = self.attn(input.cuda(), input.cuda(), input.cuda())
        #return self.bn(out)
        return out
    
class ModelDataset():
    
    def __init__(self):
        self.train_data = []
        self.test_data = []

        with open("inferences.csv") as csvDataFile:
            csvreader = csv.reader(csvDataFile, delimiter='\t')
            next(csvreader)
            for context1, context2, utterance1, utterance2, inf1, inf2, inf3 in csvreader:
                target = self.test_data if random.random() > 0.8 else self.train_data
                target.append({
                    "context1":context1,
                    "context2":context2,
                    "utterance1":utterance1,
                    "utterance2":utterance2,
                    "inf1":inf1,
                    "inf2":inf2,
                    "inf3":inf3,
                })

    def sample_train(self):
        positive = random.choice(self.train_data)
        while "inf_random" not in positive or positive["inf1"] == positive["inf_random"]:
            negative = random.choice(self.train_data)
            positive["inf_random"] = negative["inf1"]
        return positive

    def sample_test(self):
        positive = random.choice(self.test_data)
        while "inf_random" not in positive or positive["inf1"] == positive["inf_random"]:
            negative = random.choice(self.train_data)
            positive["inf_random"] = negative["inf1"]
        return positive
    
class Model():
    def __init__(self, tokenizer=None, encoder=None, lr=0.0005, esz=768, pad_len=20, model_name=None,path=None, bsz=10, save_steps=50):
        self.save_steps = save_steps
        self.model_name = model_name
        self.bsz = bsz
        self.dataset = ModelDataset()
        self.tokenizer = tokenizer
        self.encoder = encoder
        self.pad_len = pad_len
        self.classifier = Classifier(esz=esz*2) 
        self.context_attention = Attention(esz=esz, seq_len=pad_len)
        self.inference_attention = Attention(esz=esz, seq_len=pad_len)
        self.utterance_attention = Attention(esz=esz, seq_len=pad_len)
        self.optims = {
            'classifier': optim.Adam(self.classifier.parameters(), lr=lr),
            'context_attention': optim.Adam(self.context_attention.parameters(), lr=lr),
            'inference_attention': optim.Adam(self.inference_attention.parameters(), lr=lr),
            'utterance_attention': optim.Adam(self.utterance_attention.parameters(), lr=lr),
        }
        self.classifier_scheduler = torch.optim.lr_scheduler.StepLR(self.optims["classifier"], 0.9)
        self.context_attention_scheduler = torch.optim.lr_scheduler.StepLR(self.optims["context_attention"], 0.9)
        self.inference_attention_scheduler = torch.optim.lr_scheduler.StepLR(self.optims["inference_attention"], 0.9)
        self.utterance_attention_scheduler = torch.optim.lr_scheduler.StepLR(self.optims["utterance_attention"], 0.9)
        
        if path is not None:
            self.classifier.load_state_dict(torch.load(path + "/" + model_name + "_classifier.model"))
            self.context_attention.load_state_dict(torch.load(path + "/" + model_name + "_context_attention.model"))
            self.inference_attention.load_state_dict(torch.load(path + "/" + model_name + "_inference_attention.model"))
            self.utterance_attention.load_state_dict(torch.load(path + "/" + model_name + "_utterance_attention.model"))
        
        self.loss = 0
        self.step = 0
        self.c_loss = torch.nn.CrossEntropyLoss()
    
    def zero_grad(self):
        for optimizer in self.optims.values():
            optimizer.zero_grad()
            
    def update_params(self):
        for optimizer in self.optims.values():
            optimizer.step()
            
    def get_and_attend_context(self, embedded):
        context_attention_mask1 = self.context_attention(embedded["context1"].cuda())
        context_attention_mask2 = self.context_attention(embedded["context2"].cuda())
        context = torch.sum((context_attention_mask1 * embedded["context1"].cuda()) + (context_attention_mask2 * embedded["context2"].cuda()), 1)
        return context
    
    def attend_inference(self, inf):
        inference_attention_mask = self.inference_attention(inf.cuda())
        return torch.sum(inference_attention_mask * inf.cuda(), 1).cuda()
        #return inf.mean(1)

    def attend_utterance(self, utterance):
        utterance_attention_mask = self.utterance_attention(utterance.cuda())
        return torch.sum(utterance_attention_mask * utterance.cuda(), 1).cuda()
        #return utterance.mean(1)
    
    def merge_utterance_with_inference_and_context(self, context, utterance, inference):
        #return torch.cat([context,utterance,inference], 1)
        return torch.cat([utterance,inference], 1)
        
    def eval_step(self):
        self.classifier.eval()
        self.context_attention.eval()
        self.inference_attention.eval()
        
        accuracy = 0
        num_steps = 10
        for _ in range(num_steps):
            xs = self.dataset.sample_test()
            #xs = sample_train()
            print("Sample: %s" % xs["utterance1"])
            print("Positive Inference 1: %s" % xs["inf1"])
            embedded = self.embed_sample(xs)
            
            context = self.get_and_attend_context(embedded).cuda()
            inf1 = self.attend_inference(embedded["inf1"].cuda()).cuda()
            utterance1 = self.attend_utterance(embedded["utterance1"].cuda()).cuda()
            merged_positive = self.merge_utterance_with_inference_and_context(context, utterance1, inf1).cuda().unsqueeze(0)
            pred = self.classifier(merged_positive) .cuda()
            print(pred)
            pred = torch.argmax(pred[0], 1)
            #if pred.item() == 0:
                #print("Inference does not follow")
                #accuracy += .append(0)
            if pred.item() == 1:
                #print("Inference follows")
                accuracy += 1

            inf_random = self.attend_inference(embedded["inf_random"]).cuda()
            merged_negative = self.merge_utterance_with_inference_and_context(context, utterance1, inf_random).cuda().unsqueeze(0)

            print("Negative Inference 1: %s" % xs["inf_random"])
            pred = self.classifier(merged_negative).cuda() 
            print(pred)
            pred = torch.argmax(pred[0], 1)
            
            if pred.item() == 0:
            #    print("Inference does not follow")
                #accuracy.append(1)
                accuracy += 1
            #else:
                #accuracy.append(0)
            #    print("Inference follows")

            #context = self.get_and_attend_context(embedded)
            #inp = self.attend_inference_and_merge_context(embedded["inf1"], context)
        print("Accuracy : %f" % (accuracy / (num_steps * 2)))
        print("####################")
                
    def embed_sample(self, xs):
        embedded = {}
        for k in ["context1","context2","utterance1","utterance2","inf1","inf2","inf3","inf_random"]:
            if k in xs and len(xs[k]) > 0:
                tokens = self.tokenizer.encode(xs[k])
                padded = torch.full((1, self.pad_len), self.tokenizer.pad_token_id, dtype=torch.long)
                padded[0,:len(tokens)] = torch.LongTensor([tokens])
                embedded[k] = self.encoder(padded)[0]
        return embedded
        
    def train_step(self):
        loss = 0
        self.zero_grad()
        self.classifier.train()
        self.context_attention.train()
        self.inference_attention.train()
        batch_xs = []
        batch_ys = []
        for _ in range(self.bsz):
            xs = self.dataset.sample_train()
            #print(xs)
            embedded = self.embed_sample(xs)
            context = self.get_and_attend_context(embedded).cuda()
            inf1 = self.attend_inference(embedded["inf1"]).cuda()
            utterance1 = self.attend_utterance(embedded["utterance1"]).cuda()
            merged_positive = self.merge_utterance_with_inference_and_context(context, utterance1, inf1).cuda()
            inf_random = self.attend_inference(embedded["inf_random"]).cuda()
            merged_negative = self.merge_utterance_with_inference_and_context(context, utterance1, inf_random).cuda()
            batch_xs.append(merged_positive)
            batch_xs.append(merged_negative)
            batch_ys.append([1])
            batch_ys.append([0])
        inp = torch.cat(batch_xs, 0).unsqueeze(1).cuda()
        
            #if "inf2" in embedded:
            #    inf2 = self.attend_inference_and_merge_context(embedded["inf2"], context)
            #    inp = torch.cat([inp, inf2, inf_random], 0)
            #    outs += [1,0]
            #    if "inf3" in embedded:
            #        inf3 = self.attend_inference_and_merge_context(embedded["inf3"], context)
            #        inp = torch.cat([inp, inf3, inf_random], 0)
            #        outs += [1,0]
        
        outs = torch.LongTensor(batch_ys).cuda()
        pred = self.classifier(inp.cuda()).cuda()
        loss = self.c_loss(torch.transpose(pred, 1, 2), outs)

        self.step += 1
        self.loss += loss
        if self.step % self.save_steps == 0:
            print(self.step)
            print(self.loss / 50)
            self.loss = 0
            self.eval_step()
            self.classifier_scheduler.step()
            self.context_attention_scheduler.step()
            self.inference_attention_scheduler.step()
            self.utterance_attention_scheduler.step()
            torch.save(self.classifier.state_dict(), self.model_name + "_classifier.model")
            torch.save(self.context_attention.state_dict(), self.model_name + "_context_attention.model")
            torch.save(self.inference_attention.state_dict(), self.model_name + "_inference_attention.model")
            torch.save(self.utterance_attention.state_dict(), self.model_name + "_utterance_attention.model")

        loss.backward()
        self.update_params()
    
    def classify(self, utterance, condition, context):
        embedded = self.embed_sample({
            "context1":context,
            "context2":"NULL",
            "utterance1":utterance,
            "inf1":condition
        })
        
        context = self.get_and_attend_context(embedded).cuda()
        inf1 = self.attend_inference(embedded["inf1"]).cuda()
        utterance1 = self.attend_utterance(embedded["utterance1"]).cuda()
        inp = self.merge_utterance_with_inference_and_context(context, utterance1, inf1).cuda()
        
        pred = self.classifier(inp.cuda()).cuda()
        return pred
        
        
        
class SequenceModelDataset():
    def __init__(self):
        self.train_data = []
        self.test_data = []

        with open("sequences.csv") as csvDataFile:
            csvreader = csv.reader(csvDataFile, delimiter='\t')
            next(csvreader)
            for utterance, context, response, _, _, _, _ in csvreader:
                target = test_data if random.random() > 0.8 else train_data
                target.append({
                    "utterance":utterance,
                    "context":context,
                    "response":response,
                })

    def sample_train():
        positive = random.choice(self.train_data)
        while "response_random" not in positive or positive["response"] == positive["response_random"]:
            negative = random.choice(train_data)
            positive["response_random"] = negative["response"]
        return positive

    def sample_test():
        positive = random.choice(self.test_data)
        while "response_random" not in positive or positive["response"] == positive["response_random"]:
            negative = random.choice(train_data)
            positive["response_random"] = negative["response"]
        return positive

class SequenceModel(Model):
    def __init__(self, tokenizer=None, encoder=None, lr=0.0001, esz=768, pad_len=30,path=None, model_name=None, save_steps=50):
        super().__init__(tokenizer=tokenizer, encoder=encoder, lr=lr, esz=esz, pad_len=pad_len,path=path,model_name=model_name, save_steps=save_steps)
                    
    def get_and_attend_context(self, embedded):
        context_attention_mask = self.context_attention(embedded["context"])
        return context_attention_mask.cuda() * embedded["context"].cuda()
    
    def embed_sample(self, xs):
        embedded = {}
        for k in ["utterance","context","response",  "response_random"]:
            if k in xs and len(xs[k]) > 0:
                tokens = self.tokenizer.encode(xs[k])
                padded = torch.full((1, self.pad_len), self.tokenizer.pad_token_id, dtype=torch.long)
                padded[0,:len(tokens)] = torch.LongTensor([tokens])
                embedded[k] = self.encoder(padded)[0]
        return embedded
    
    def eval_step(self):
        self.classifier.eval()
        self.context_attention.eval()
        self.inference_attention.eval()
        
        accuracy = 0
        num_steps = 10
        for _ in range(num_steps):
            xs = sample_test()
            print("Utterance: %s" % xs["utterance"])
            print("Positive response: %s" % xs["response"])
            embedded = self.embed_sample(xs)
            
            context = SequenceModel.get_and_attend_context(self, embedded)
            utterance = self.attend_utterance(embedded["utterance"])
            response = self.attend_inference(embedded["response"])
            merged_positive = self.merge_utterance_with_inference_and_context(context, utterance, response)

            pred = self.classifier(merged_positive) 
            #print(pred)
            pred = torch.argmax(pred, 1)
            #if pred.item() == 0:
                #print("Inference does not follow")
                #accuracy += .append(0)
            if pred.item() == 1:
                #print("Inference follows")
                accuracy += 1

            response_random = self.attend_inference(embedded["response_random"])
            merged_negative = self.merge_utterance_with_inference_and_context(context, utterance, response_random)

            pred = self.classifier(merged_negative) 
            
            pred = torch.argmax(pred, 1)
            print("Negative Inference 1: %s" % xs["response_random"])
            if pred.item() == 0:
            #    print("Inference does not follow")
                #accuracy.append(1)
                accuracy += 1
            #else:
                #accuracy.append(0)
            #    print("Inference follows")
            #print(pred)
            #context = self.get_and_attend_context(embedded)
            #inp = self.attend_inference_and_merge_context(embedded["inf1"], context)
        print("Accuracy : %f" % (accuracy / (num_steps * 2)))
                        
    def train_step(self):
        loss = 0
        self.zero_grad()
        self.classifier.train()
        self.context_attention.train()
        self.inference_attention.train()

        xs = super().sample_train()
        
        embedded = self.embed_sample(xs)
        
        context = self.get_and_attend_context(embedded)
        response = self.attend_inference(embedded["response"])
        utterance = self.attend_inference(embedded["utterance"])
        merged_positive = self.merge_utterance_with_inference_and_context(context, utterance, response)

        response_random = self.attend_inference(embedded["response_random"])
        merged_negative = self.merge_utterance_with_inference_and_context(context, utterance, response_random)
        inp = torch.cat([merged_positive, merged_negative], 0).unsqueeze(1)
        outs = [[1],[0]]
        
        outs = torch.cuda.LongTensor(outs)

        pred = self.classifier(inp)

        loss = self.c_loss(torch.transpose(pred, 1, 2), outs)

        self.step += 1
        self.loss += loss
        if self.step % self.save_steps == 0:
            print(self.step)
            print(self.loss / 50)
            self.loss = 0
            self.eval_step()
            torch.save(self.classifier.state_dict(), "classifier_seq.model")
            torch.save(self.context_attention.state_dict(), "context_attention_seq.model")
            torch.save(self.inference_attention.state_dict(), "inference_attention_seq.model")
            torch.save(self.utterance_attention.state_dict(), "utterance_attention_seq.model")
            self.classifier_scheduler.step()
            self.context_attention_scheduler.step()
            self.inference_attention_scheduler.step()
            self.utterance_attention_scheduler.step()
        loss.backward()
        self.update_params()
    
    def classify(self, utterance, context, response):
        """Inference step to determine whether response is coherent given utterance"""
        self.classifier.eval()
        self.context_attention.eval()
        self.inference_attention.eval()
        
        embedded = self.embed_sample({"utterance":utterance, "response":response, "context":context})
        
        context = self.get_and_attend_context(embedded)
        response = self.attend_inference(embedded["response"])
        utterance = self.attend_inference(embedded["utterance"])
        merged = self.merge_utterance_with_inference_and_context(context, utterance, response)
        pred = self.classifier(merged)
        return softmax(pred[0].cpu().detach().numpy())[1].item()

class RuntimeModel():
    def __init__(self, classifier_model, seq_model):
        self.candidates = []
        self.history = []
        
        self.step = 0
        self.done = False
        self.classifier_model = classifier_model
        self.seq_model = seq_model
        self.conditions = []
        
        with open("runtime.tsv") as csvDataFile:
            csvreader = csv.reader(csvDataFile, delimiter='\t')
            next(csvreader)
            for candidate, condition, example1, example2, hint1, hint2 in csvreader:
                self.candidates.append({
                    "candidate":candidate,
                    "condition":condition,
                    "example1":example1,
                    "example2":example2,
                    "hint1":hint1,
                    "hint2":hint2,
                })
    
    def next(self):
        if self.step == 0:
            for index, x in enumerate(self.candidates):
                if x["condition"] == "NULL":
                    self.history.append(x["candidate"])
                    self.candidates.pop(index)
                    return x["candidate"]
            raise Exception()
        
        idx_max = -1
        c_max = -1
        for index, x in enumerate(self.candidates):
            classification = self.seq_model.classify(x["candidate"], x["condition"], self.history[-1])
            if classification > c_max:
                c_max = classification
                idx_max = index
                
        return self.candidates[idx_max].candidate
        
    def process(self, inp, threshold=0.5):
        max_coherence_score = -1
        max_coherence_index = -1
        for index, x in enumerate(self.candidates):
#                    def classify(self, utterance, condition, context):

            score = self.classifier_model.classify(inp, x["condition"], self.history[-1])
            score = softmax(score.cpu().detach().numpy())
            print(score)
            print("Condition Classification Score for %s : %f" % (x["condition"], score[0][1]))
            
            if score[0,1] > threshold:
                self.conditions.append(x["condition"])
                #def classify(self, utterance, context, response):
                coherence_score = self.seq_model.classify(inp, self.history[-1], x["candidate"])
                print("Coherence Score for %s : %f" % (x["candidate"], coherence_score))
                if coherence_score > max_coherence_score:
                    max_coherence_score = coherence_score
                    max_coherence_index = index
        if max_coherence_score == -1:
            raise Exception()
        return self.candidates[max_coherence_index]
        
def run_with_opts(opts):
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    special_tokens_dict = {'additional_special_tokens': ['<PLH>', '<s>','</s>']}
    tokenizer.add_special_tokens(special_tokens_dict)
    encoder = DistilBertModel.from_pretrained('distilbert-base-uncased')
    encoder.resize_token_embeddings(len(tokenizer))

    for param in encoder.parameters():
        param.requires_grad = False

    pad_token = tokenizer.pad_token
        
    if opts["train_classifier"]:
        model = Model(tokenizer=tokenizer, encoder=encoder,model_name="classifier")
        for i in range(1000):
            model.train_step()
    else:
        model = Model(path=opts["model_dir"], tokenizer=tokenizer, encoder=encoder)
    
    
    if opts["train_seq"]:
        seq_model = SequenceModel(tokenizer=tokenizer, encoder=encoder,model_name="sequence")
        for i in range(1000):
            seq_model.train_step()
    else:
        seq_model = SequenceModel(path=opts["model_dir"], tokenizer=tokenizer, encoder=encoder)
        
    runtime_model = RuntimeModel(model, seq_model)
    
    if opts["interactive"]:
        while runtime_model.done is False:
            print(runtime_model.next())
            inp = input("> ")
            runtime_model.process(inp)
    return runtime_model
    
def main():
    # parse command line options
    try:
        opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
    except Exception as msg:
        print(msg)
        print("for help use --help")
        sys.exit(2)
    
    opts = {
        "train_classifier":False,
        "classifier_path":None,
        "train_seq":False,
        "seq_path":None
    }
    
    # process options
    for o, a in opts:
        if o in ("-h", "--help"):
            print(__doc__)
            sys.exit(0)
            continue
        if o is "--train-classifier":
            opts["train_classifier"] = true
        if o is "--train-seq" :
            opts["train_seq"] = true
        if o is "--model-dir":
            opts["model_dir"] = a
        
    run_with_opts(opts)

#if __name__ == "__main__":
#    main()

In [None]:
model = run_with_opts({"model_dir":"./", "train_classifier":True, "train_seq":True, "interactive":False})



50
tensor(0.7496, device='cuda:0', grad_fn=<DivBackward0>)
Sample: Not really.
Positive Inference 1: You have seen some musicals.
tensor([[[-0.6694, -0.7833]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: When you were at university, you had at least 10 hours of classes.
tensor([[[-1.4414, -0.5743]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: No, I don't know how to make curry.
Positive Inference 1: You don't know how to make curry.
tensor([[[-0.0513,  0.5419]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You read more than three books per year.
tensor([[[0.3805, 0.4403]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Sample: Not really.
Positive Inference 1: You have seen some musicals.
tensor([[[-0.6694, -0.7833]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: When you were at university, you had at least 10 hours of classes.
tensor([[[-1.4414, -0.

tensor([[[-0.2815,  1.1170]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You read more than three books per year.
tensor([[[0.7718, 0.5094]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Sample: No.
Positive Inference 1: You do not own a car.
tensor([[[-1.0231, -1.7520]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You don't know how to drive.
tensor([[[-1.4855, -1.5277]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: No, I don't like it.
Positive Inference 1: You hate your job.
tensor([[[0.5006, 1.0025]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You read around 10 books last year.
tensor([[[0.1727, 0.8923]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Sample: Not really.
Positive Inference 1: You have seen some musicals.
tensor([[[-0.6680, -1.0727]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: When you were

tensor([[[-0.0183, -0.2061]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You work for an engineering company.
tensor([[[-0.2837,  0.0191]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: I am a doctor.
Positive Inference 1: You work in health.
tensor([[[-0.0183, -0.2061]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You work for an engineering company.
tensor([[[-0.2837,  0.0191]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: No.
Positive Inference 1: You do not own a car.
tensor([[[-0.9456, -1.6332]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You don't know how to drive.
tensor([[[-1.3950, -1.3990]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: Winter.
Positive Inference 1: Your favourite season is winter.
tensor([[[-0.2373, -0.7409]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inferen

tensor([[[-0.6279, -0.9923]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: When you were at university, you had at least 10 hours of classes.
tensor([[[-2.5173, -0.4748]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: I am a doctor.
Positive Inference 1: You work in health.
tensor([[[-0.0721, -0.2537]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You work for an engineering company.
tensor([[[-0.3288, -0.0359]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Accuracy : 0.300000
####################
450
tensor(0.3951, device='cuda:0', grad_fn=<DivBackward0>)
Sample: No, I don't like it.
Positive Inference 1: You hate your job.
tensor([[[0.5768, 1.1312]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You read around 10 books last year.
tensor([[[0.2414, 1.0204]]], device='cuda:0', grad_fn=<CudnnBatchNormBackward>)
Sample: I play basketball
Positive Inferen

tensor([[[-1.0343, -1.7409]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You don't know how to drive.
tensor([[[-1.4961, -1.5002]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: I play basketball
Positive Inference 1: You play basketball.
tensor([[[-0.3571, -0.4446]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You regularly read the newspaper on the weekend.
tensor([[[-0.5671, -0.9963]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: Winter.
Positive Inference 1: Your favourite season is winter.
tensor([[[-0.3066, -0.8240]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Negative Inference 1: You play an offensive position in basketball.
tensor([[[-0.3291, -1.0736]]], device='cuda:0',
       grad_fn=<CudnnBatchNormBackward>)
Sample: Icecream.
Positive Inference 1: You like eating icecream.
tensor([[[-0.8508, -0.7592]]], device='cuda:0',
       grad_fn=<CudnnB

In [None]:
model = run_with_opts({"model_dir":"./", "train_classifier":False, "train_seq":False, "interactive":False})
print(model.next())
model.process("I play tennis")