In [2]:
import torch
import app
import os

In [5]:
import torch
import Models.pytorch_joy_and_anger.joy_and_anger_utils as model_utils

Load model

In [3]:
model = torch.jit.load(os.path.join(app.root(), "Models", "pytorch_joy_and_anger", "pytorch_joy_and_anger_model_torchscript.pt"))

In [4]:
model.eval()

RecursiveScriptModule(
  original_name=HappyClassifierModel
  (embedding): RecursiveScriptModule(original_name=EmbeddingBag)
  (lstm): RecursiveScriptModule(original_name=LSTM)
  (linear1): RecursiveScriptModule(original_name=Linear)
  (fc1): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Linear)
    (1): RecursiveScriptModule(original_name=ReLU)
  )
  (linear2): RecursiveScriptModule(original_name=Linear)
  (fc2): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Linear)
  )
)

Load data

In [6]:
train_ds = model_utils.HappyClassifierDataset("train.txt", probabilistic=True)

loaded 7520 items
{'joy': 0.0, 'anger': 1.0}
('im grabbing a minute to post i feel greedy wrong', 1)


load python pre-processor

In [7]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

In [8]:
tokenizer = get_tokenizer('basic_english')

In [9]:
vocab = build_vocab_from_iterator(list(map(lambda k: tokenizer(k), [txt for txt, label in train_ds.train_data])), specials=["<unk>"])

In [10]:
vocab.set_default_index(vocab["<unk>"])

In [11]:
vocab(['great', 'day', "we're", 'having'])

[353, 96, 0, 171]

In [12]:
tokenizer

<function torchtext.data.utils._basic_english_normalize(line)>

In [13]:
# idx 2 has issues

In [14]:
text_pipeline = lambda x: vocab(tokenizer(x))
label_pipeline = lambda x: x

Iterate our dataloader and see the results we get with only a single offset

In [22]:
def evaluate_accuracy(model, ds: list):
    accuracy = 0
    total_count = 0

    for i, (text, label) in enumerate(ds):
        processed_text = text_pipeline(text)
        text_input = torch.as_tensor(processed_text, dtype=torch.int64)
        offset_input = torch.as_tensor([0], dtype=torch.int32)
        pred = model(text_input, offset_input)
        print(pred, label)

        accuracy += (torch.argmax(pred) == label).sum().item()
        total_count += 1
        print(f"Iteration {i} : Accuracy: {accuracy / total_count}, # Correct: {(accuracy / total_count) * total_count} / {total_count}")


In [23]:
evaluate_accuracy(model, train_ds.train_data)

tensor([[-0.8688,  0.9009]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 0 : Accuracy: 1.0, # Correct: 1.0 / 1
tensor([[-2.6078,  2.9104]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 1 : Accuracy: 1.0, # Correct: 2.0 / 2
tensor([[ 0.5012, -0.7078]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 2 : Accuracy: 1.0, # Correct: 3.0 / 3
tensor([[ 1.9383, -2.3531]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 3 : Accuracy: 1.0, # Correct: 4.0 / 4
tensor([[-0.0424, -0.0848]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 4 : Accuracy: 0.8, # Correct: 4.0 / 5
tensor([[ 1.0332, -1.3709]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 5 : Accuracy: 0.8333333333333334, # Correct: 5.0 / 6
tensor([[ 1.3337, -1.6736]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 6 : Accuracy: 0.8571428571428571, # Correct: 6.0 / 7
tensor([[-2.2486,  2.5371]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 7 : Accuracy: 0.875, # Correct: 7.0 / 8
tensor([[ 1.9471, -2.394

In [24]:
test_ds = model_utils.HappyClassifierDataset("test.txt", probabilistic=True)

loaded 970 items
{'joy': 0.0, 'anger': 1.0}
('i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived', 0)


In [25]:
evaluate_accuracy(model, test_ds.train_data)

tensor([[ 1.1173, -1.4173]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 0 : Accuracy: 1.0, # Correct: 1.0 / 1
tensor([[-0.6727,  0.6982]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 1 : Accuracy: 1.0, # Correct: 2.0 / 2
tensor([[ 0.6787, -0.9320]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 2 : Accuracy: 1.0, # Correct: 3.0 / 3
tensor([[ 1.5835, -1.9704]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 3 : Accuracy: 1.0, # Correct: 4.0 / 4
tensor([[ 0.5843, -0.8203]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 4 : Accuracy: 0.8, # Correct: 4.0 / 5
tensor([[ 0.4314, -0.6609]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 5 : Accuracy: 0.8333333333333334, # Correct: 5.0 / 6
tensor([[ 2.2610, -2.7868]], grad_fn=<DifferentiableGraphBackward>) 0
Iteration 6 : Accuracy: 0.8571428571428571, # Correct: 6.0 / 7
tensor([[-0.3751,  0.2793]], grad_fn=<DifferentiableGraphBackward>) 1
Iteration 7 : Accuracy: 0.875, # Correct: 7.0 / 8
tensor([[ 1.4439, -1.838