In [66]:
import json
import sagemaker
import torch
import torch.nn as nn
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorch as PyTorchEstimator, PyTorchModel

In [2]:
session = sagemaker.Session()
role = get_execution_role()

## Download test model

In [3]:
!pip install -q transformers

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_latest_p36/bin/python -m pip install --upgrade pip' command.[0m


#### Context Encoder

In [4]:
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer

In [5]:
c_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [6]:
c_model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

Downloading:   0%|          | 0.00/492 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [7]:
input_ids = c_tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]

In [8]:
with torch.no_grad():
    embeddings = c_model(input_ids).pooler_output
embeddings.shape

torch.Size([1, 768])

In [9]:
c_tokenizer.save_pretrained('code/dpr-ctx_encoder-single-nq-base-tokenizer')
c_model.save_pretrained('code/dpr-ctx_encoder-single-nq-base-model')

#### Question Encoder

In [10]:
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer

In [11]:
q_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [12]:
q_model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

Downloading:   0%|          | 0.00/493 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [13]:
input_ids = q_tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]

In [14]:
with torch.no_grad():
    embeddings = q_model(input_ids).pooler_output
embeddings.shape

torch.Size([1, 768])

In [15]:
q_tokenizer.save_pretrained('code/dpr-question_encoder-single-nq-base-tokenizer')
q_model.save_pretrained('code/dpr-question_encoder-single-nq-base-model')

#### Reader

In [16]:
from transformers import DPRReader, DPRReaderTokenizer

In [17]:
r_tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [18]:
r_model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')

Downloading:   0%|          | 0.00/484 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [19]:
encoded_inputs = r_tokenizer(
        questions=["What is love ?"],
        titles=["Haddaway"],
        texts=["'What Is Love' is a song recorded by the artist Haddaway"],
        return_tensors='pt'
    )

In [20]:
with torch.no_grad():
    outputs = r_model(**encoded_inputs)

In [21]:
start_logits = outputs.start_logits
end_logits = outputs.end_logits
relevance_logits = outputs.relevance_logits

In [22]:
import torch
with torch.no_grad():
    print(torch.softmax(start_logits, dim=-1), torch.softmax(end_logits, dim=-1), relevance_logits)

tensor([[2.5585e-04, 2.2813e-04, 1.1821e-04, 1.0410e-04, 8.9758e-05, 4.9391e-04,
         3.3133e-04, 9.3138e-05, 7.6866e-05, 4.9471e-04, 2.4096e-04, 6.1555e-03,
         3.0341e-04, 5.4163e-04, 1.1801e-04, 2.0656e-03, 7.0882e-01, 2.0653e-03,
         1.3085e-03, 3.0284e-04, 5.9053e-03, 4.0717e-03, 2.6339e-01, 6.1443e-04,
         1.8134e-03]]) tensor([[0.0048, 0.0015, 0.0020, 0.0054, 0.0038, 0.1663, 0.0012, 0.0021, 0.0069,
         0.1666, 0.0012, 0.0020, 0.0018, 0.0424, 0.0026, 0.0013, 0.0055, 0.1844,
         0.0043, 0.0105, 0.0500, 0.1402, 0.0529, 0.0058, 0.1346]]) tensor([-1.2456])


In [23]:
r_tokenizer.save_pretrained('code/dpr-reader-single-nq-base-tokenizer')
r_model.save_pretrained('code/dpr-reader-single-nq-base-model')

#### Generate full model

In [74]:
import re

import torch
import torch.nn as nn

In [75]:
class Model(nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        self.c_model = DPRContextEncoder.from_pretrained('code/dpr-ctx_encoder-single-nq-base-model')
        self.c_tokenizer = DPRContextEncoderTokenizer.from_pretrained('code/dpr-ctx_encoder-single-nq-base-tokenizer')
        self.q_model = DPRQuestionEncoder.from_pretrained('code/dpr-question_encoder-single-nq-base-model')
        self.q_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('code/dpr-question_encoder-single-nq-base-tokenizer')
        self.r_model = DPRReader.from_pretrained('code/dpr-reader-single-nq-base-model')
        self.r_tokenizer = DPRReaderTokenizer.from_pretrained('code/dpr-reader-single-nq-base-tokenizer')
        with open('code/pg1974.txt') as f:
            self.text = re.sub(r'\s+', ' ', f.read())
        self.contexts = re.findall(r'.{7,}?[.?!]', self.text)
        context_embeddings = []
        with torch.no_grad():
            for context in self.contexts:
                input_ids = c_tokenizer(context, return_tensors='pt')["input_ids"]
                output = self.c_model(input_ids)
                context_embeddings.append(output.pooler_output)
        self.context_embeddings = nn.Parameter(torch.cat(context_embeddings, dim=0))
    
    def forward(self, question):
        q_input_ids = self.q_tokenizer(question, return_tensors='pt')['input_ids']
        q_output = self.q_model(q_input_ids)
        q_embedding = q_output.pooler_output
        similarities = torch.cosine_similarity(q_embedding, self.context_embeddings)
        topk_similarities = torch.topk(similarities, k=10, dim=-1)
        contexts = [self.contexts[i] for i in topk_similarities.indices]
        encoded_inputs = r_tokenizer(
            questions=[question for _ in contexts],
#             titles=[],
            texts=contexts,
            return_tensors='pt',
            padding=True,
            truncation=True
        )
        r_output = self.r_model(**encoded_inputs)
        return r_output.start_logits, r_output.end_logits, encoded_inputs['input_ids'], r_output.relevance_logits

In [76]:
model = Model()

In [77]:
with torch.no_grad():
    print(model('What is comedy?'))

(tensor([[ -6.7622,  -8.6719,  -9.3351,  -9.5752,  -9.6634,  -9.3313,  -8.2687,
          -8.9506,  -8.5052,  -7.6946,  -9.4911,  -9.0801,  -9.4368,  -6.3643,
          -9.2788, -10.2195, -10.3676, -10.4112, -10.0516, -11.0452, -10.0121,
          -8.1517,  -8.9436,  -9.3258,  -9.2168,  -9.3015, -11.0162, -11.5700,
         -12.1127, -11.9149, -11.5115, -12.0661, -12.2538, -12.3589, -12.3723,
         -12.2063, -12.2309, -12.4014, -12.3581, -12.3731, -12.2477, -12.3694,
         -12.4034, -12.3199, -12.4176, -12.3798, -10.2013, -12.3394, -12.4206,
         -12.4058, -12.3400, -12.1302, -12.0025, -12.1467, -12.0612, -11.4663,
         -11.6981, -12.1386, -12.0845, -11.9194, -11.5851, -12.2261, -12.2780,
         -12.3107, -12.2934, -12.3199, -12.2477, -12.3278, -12.3605, -12.3724,
         -12.3932, -12.2460],
        [ -8.1278,  -9.1700,  -9.9306,  -9.9237, -10.6872,  -9.6525,  -8.8582,
          -8.4431,  -9.3546, -10.0778,  -9.5079,  -9.4735, -10.5986,  -9.8921,
         -10.6934,  -

In [78]:
from time import time
start_time = time()
with torch.no_grad():
    start_logits, end_logits, input_ids, relevance_logits = model('What is comedy?')
topk_relevant = torch.topk(relevance_logits, k=relevance_logits.shape[0] // 2, dim=-1)
starts = torch.argmax(start_logits, dim=-1)
ends = torch.argmax(end_logits, dim=-1)
for index in topk_relevant.indices:
    question_context = input_ids[index]
    start = starts[index]
    end = ends[index]
    print(start, end)
    print(r_tokenizer.decode(question_context[start:end + 1]))
    print(r_tokenizer.decode(question_context))
    print()
print('total time:', time() - start_time)

tensor(12) tensor(12)
tragedy
[CLS] what is comedy? [SEP] be that as it may, tragedy - - as also comedy - - was at first mere improvisation. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

tensor(22) tensor(22)
worse
[CLS] what is comedy? [SEP] the same distinction marks off tragedy from comedy ; for comedy aims at representing men as worse, tragedy as better than in actual life. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

tensor(32) tensor(32)
imitation
[CLS] what is comedy? [SEP] they differ, however, from one : another in three respects, - - the medium, the objects

In [79]:
torch.save(model.state_dict(), 'model.pth')

In [80]:
import tarfile
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('model.pth', recursive=True)

In [81]:
artifact = session.upload_data(path='model.tar.gz', bucket='kbchatter', key_prefix='haystack')

In [82]:
artifact

's3://kbchatter/haystack/model.tar.gz'

## Now to deployment

In [83]:
pytorch_model = PyTorchModel(model_data=artifact,
                             role=role,
                             framework_version='1.6.0',
                             py_version='py3',
                             entry_point='code/predict.py',
                             source_dir='code',
                             dependencies=[
                                 'code/pg1974.txt',
                                 'code/dpr-ctx_encoder-single-nq-base-model',
                                 'code/dpr-ctx_encoder-single-nq-base-tokenizer',
                                 'code/dpr-question_encoder-single-nq-base-model',
                                 'code/dpr-question_encoder-single-nq-base-tokenizer',
                                 'code/dpr-reader-single-nq-base-model',
                                 'code/dpr-reader-single-nq-base-tokenizer'
                             ],
                             )

In [84]:
predictor = pytorch_model.deploy(instance_type='ml.c4.xlarge', initial_instance_count=1, wait=True)

-----------------------------!

In [85]:
predictor.endpoint  # for lambda

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


'pytorch-inference-2021-07-03-22-33-13-305'

In [86]:
predictor.serializer = sagemaker.serializers.JSONSerializer()
predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

In [87]:
from time import time

In [94]:
start = time()
print(predictor.predict({'question': 'What is comedy?'}))  # need to convert output/input to json for lambda
print(time() - start)

['tragedy', 'worse', 'imitation', 'the ugly', 'greece']
0.992013692855835


In [None]:
# predictor.update_endpoint()

## Inspect the predictor and pytorch_model classes

In [None]:
import inspect

In [None]:
lines = inspect.getsource(predictor.__class__)
print(lines)

In [None]:
for cls in inspect.getmro(predictor.__class__):
    lines = inspect.getsource(cls)
    print(lines)

In [None]:
for cls in inspect.getmro(pytorch_model.__class__):
    lines = inspect.getsource(cls)
    print(lines)

## Remove model and endpoint

In [95]:
pytorch_model.delete_model()

In [96]:
predictor.delete_endpoint()