# Report

# Statistical Methods for Natural Language Semantics

## Konstantin Todorov
## Student number: 12402559

### Repository link: https://github.com/ktodorov/uva-semantics-19


<b>Results:</b>

In [1]:
from nltk import word_tokenize
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchtext
import torchtext.data
import os
import io
import pickle

from encoders.encoding_helper import EncodingHelper

from helpers.cache_storage import CacheStorage
from helpers.data_storage import DataStorage

from inference_model import InferenceModel

In [8]:
def transform_sentence(sentence, w2i_dict, device):
    sentence = word_tokenize(sentence)
    indexes = torch.tensor([w2i_dict[word] for word in sentence]).to(device)
    length = torch.Tensor([len(indexes)]).long().to(device)

    return indexes, length


def calculate_inference(model, token_vocabulary, label_dictionary, device, premise, hypothesis):
    premise, premise_length = transform_sentence(
        premise, token_vocabulary.stoi, device)

    hypothesis, hypothesis_length = transform_sentence(
        hypothesis, token_vocabulary.stoi, device)

    inference_model = InferenceModel(
        premise.expand(1, -1).transpose(0, 1),
        premise_length,
        hypothesis.expand(1, -1).transpose(0, 1),
        hypothesis_length)

    model_prediction = model.forward(inference_model)

    print(
        f"The premise {label_dictionary[model_prediction.argmax().item()]} the hypothesis")


def initialize_data():
    device = torch.device("cuda")

    # Load the data sets and the vocabulary
    print('Loading data...', end='')

    data_storage = DataStorage()
    token_vocabulary, _ = data_storage.get_vocabulary()

    print('Loaded')

    label_dictionary = {
        0: "entails",
        1: "contradicts",
        2: "is neutral to"
    }

    return device, token_vocabulary, label_dictionary

def initialize_model(model_path, device):
    assert os.path.isfile(model_path), 'Model path is not valid'

    # Check if we can get the cached model. If not, raise an exception
    cache_storage = CacheStorage()
    
    print('Loading model...', end='')
    # parameters_helper.snapshot_location)
    model = cache_storage.load_model_snapshot(model_path)
    if not model:
        raise Exception('Model not found!')

    print('Loaded')

    return model

In [3]:
device, token_vocabulary, label_dictionary = initialize_data()

Loading data...Loaded


In [13]:
MODEL_PATH = 'results/mean/best_snapshot_devacc_60.797077922077925_devloss_0.9943482875823975__iter_25752_model.pt'
# MODEL_PATH = 'results/uni-lstm/best_snapshot_devacc_34.52150974025974_devloss_1.0959851741790771__iter_25752_model.pt'

model = initialize_model(MODEL_PATH, device)

Loading model...Loaded


In [19]:
premise = 'the boy is doing a test'
hypothesis = 'the boy is doing a test at the university'
# hypothesis = 'the boy is doing a test well'

calculate_inference(model, token_vocabulary,
                    label_dictionary, device, premise, hypothesis)

The premise is neutral to the hypothesis
