In [28]:
import torch
import app
import os

In [29]:
import torch
import Models.pytorch_joy_and_anger.joy_and_anger_utils as model_utils

Load model

In [30]:
model = torch.jit.load(os.path.join(app.root(), "Models", "pytorch_joy_and_anger", "pytorch_joy_and_anger_model_torchscript.pt"))

In [31]:
model.eval()

RecursiveScriptModule(
  original_name=HappyClassifierModel
  (embedding): RecursiveScriptModule(original_name=EmbeddingBag)
  (lstm): RecursiveScriptModule(original_name=LSTM)
  (linear1): RecursiveScriptModule(original_name=Linear)
  (fc1): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Linear)
    (1): RecursiveScriptModule(original_name=ReLU)
  )
  (linear2): RecursiveScriptModule(original_name=Linear)
  (fc2): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Linear)
  )
)

Load data

In [32]:
train_ds = model_utils.HappyClassifierDataset("train.txt", probabilistic=True)

loaded 7520 items
{'joy': 0.0, 'anger': 1.0}
('im grabbing a minute to post i feel greedy wrong', 1)


load python pre-processor

In [33]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

In [34]:
tokenizer = get_tokenizer('basic_english')

In [35]:
vocab = build_vocab_from_iterator(list(map(lambda k: tokenizer(k), [txt for txt, label in train_ds.train_data])), specials=["<unk>"])

In [36]:
vocab.set_default_index(vocab["<unk>"])

In [37]:
vocab(['great', 'day', "we're", 'having'])

[353, 96, 0, 171]

In [38]:
tokenizer

<function torchtext.data.utils._basic_english_normalize(line)>

In [39]:
# idx 2 has issues

In [40]:
text_pipeline = lambda x: vocab(tokenizer(x))
label_pipeline = lambda x: x

Iterate our dataloader and see the results we get with only a single offset

In [41]:
def evaluate_accuracy(model, ds: list):
    accuracy = 0
    total_count = 0

    for i, (text, label) in enumerate(ds):
        processed_text = text_pipeline(text)
        text_input = torch.as_tensor(processed_text, dtype=torch.int64)
        offset_input = torch.as_tensor([0], dtype=torch.int32)
        pred = model(text_input, offset_input)
        # print(pred, label)

        accuracy += (torch.argmax(pred) == label).sum().item()
        total_count += 1
        # print every 1/5th of the way to the end
        if (i + 1) % (len(ds) // 5) == 0:
            print(f"Iteration {i} : Accuracy: {accuracy / total_count}, # Correct: {(accuracy / total_count) * total_count} / {total_count}")


In [42]:
evaluate_accuracy(model, train_ds.train_data)

Iteration 1503 : Accuracy: 0.9335106382978723, # Correct: 1404.0 / 1504
Iteration 3007 : Accuracy: 0.9384973404255319, # Correct: 2823.0 / 3008
Iteration 4511 : Accuracy: 0.9388297872340425, # Correct: 4236.0 / 4512
Iteration 6015 : Accuracy: 0.9363364361702128, # Correct: 5633.0 / 6016
Iteration 7519 : Accuracy: 0.9344414893617021, # Correct: 7027.0 / 7520


In [43]:
test_ds = model_utils.HappyClassifierDataset("test.txt", probabilistic=True)

loaded 970 items
{'joy': 0.0, 'anger': 1.0}
('i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived', 0)


In [44]:
evaluate_accuracy(model, test_ds.train_data)

Iteration 193 : Accuracy: 0.9329896907216495, # Correct: 181.0 / 194
Iteration 387 : Accuracy: 0.9097938144329897, # Correct: 353.0 / 388
Iteration 581 : Accuracy: 0.9243986254295533, # Correct: 538.0 / 582
Iteration 775 : Accuracy: 0.9265463917525774, # Correct: 719.0 / 776
Iteration 969 : Accuracy: 0.9237113402061856, # Correct: 896.0 / 970
