In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%config Completer.use_jedi = False

## Basic app based on bm25

### Define the application package

In [3]:
from vespa.package import ApplicationPackage, Field, FieldSet, RankProfile

app_package = ApplicationPackage(name="cord19")
app_package.schema.add_fields(
    Field(name = "cord_uid", type = "string", indexing = ["attribute", "summary"]),
    Field(name = "title", type = "string", indexing = ["index", "summary"], index = "enable-bm25")
)
app_package.schema.add_field_set(
    FieldSet(name = "default", fields = ["title"])
)
app_package.schema.add_rank_profile(
    RankProfile(name = "bm25", first_phase = "bm25(title)")
)

### Deploy the application

Define the absolute disk path to store the application files.

In [4]:
import os

os.environ["WORK_DIR"] = "/Users/tmartins"
disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")

Deploy to a docker container

In [5]:
from vespa.package import VespaDocker

vespa_docker = VespaDocker(port=8089)

app = vespa_docker.deploy(
    application_package = app_package,
    disk_folder=disk_folder
)

### Feed data to the application

In [6]:
from pandas import read_csv

parsed_feed = read_csv("/Users/tmartins/projects/sw/blog/_notebooks/data/2021-01-18-cord19-deploy-bert-from-pyvespa/parsed_feed.csv")

In [7]:
parsed_feed = parsed_feed.head(100)

In [8]:
for idx, row in parsed_feed.iterrows():
    response = app.feed_data_point(
        schema = "cord19",
        data_id = str(row["cord_uid"]),
        fields = {
            "cord_uid": str(row["cord_uid"]),
            "title": str(row["title"]),
        }
    )
    #print(response.text)

### Make a query

In [9]:
from vespa.query import QueryModel, RankProfile as Ranking, OR

result = app.query(
    query="this is a test", 
    query_model=QueryModel(
        match_phase = OR(),
        rank_profile = Ranking(name="bm25")
    )
)

In [10]:
result.number_documents_retrieved

26

## Exporting a BERT model to ONNX

* Create a `from vespa.ml import BertModel` that contains all the information required to ensure training and serving compatibility. The instance can be used when including the model in the application package.

In [11]:
from transformers import BertForSequenceClassification, BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained("prajjwal1/bert-tiny")
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny")  # This could be any pytorch BERT model

Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

In [12]:
from vespa.ml import BertModelConfig

bert_config = BertModelConfig(
    model_id="pretrained_bert_tiny",
    tokenizer=tokenizer,
    query_input_size=32,
    doc_input_size=96
)

In [13]:
from vespa.package import SecondPhaseRanking

app_package.add_bert_ranking(
    model_config=bert_config,
    model=model,
    inherits="default",
    first_phase="bm25(title)",
    second_phase=SecondPhaseRanking(
        rerank_count=10, expression="logit0"
    ),
)

  position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length]
  assert all(


### Redeploy the application

In [14]:
app = vespa_docker.deploy(
    application_package = app_package,
    disk_folder=disk_folder
)

### Feed data to the application

In [15]:
#bert_config.doc_tensor(text = str(row["title"]))

In [16]:
for idx, row in parsed_feed.iterrows():
    fields = {
        "cord_uid": str(row["cord_uid"]),
        "title": str(row["title"]),
    }
    fields.update(bert_config.doc_tensor(text = str(row["title"])))

    response = app.feed_data_point(
        schema = "cord19",
        data_id = str(row["cord_uid"]),
        fields=fields
    )
    #print(response.text)

### Make a query

In [17]:
from vespa.query import QueryModel, RankProfile as Ranking, OR, QueryRankingFeature

result = app.query(
    query="this is a test", 
    query_model=QueryModel(
        query_properties=[
            QueryRankingFeature(
                name=bert_config.query_token_ids_name, 
                mapping=bert_config.query_tensor_mapping)
        ],
        match_phase = OR(),
        rank_profile = Ranking(name="pretrained_bert_tiny")
    )
)

In [18]:
result.json

{'root': {'id': 'toplevel',
  'relevance': 1.0,
  'fields': {'totalCount': 26},
  'coverage': {'coverage': 100,
   'documents': 100,
   'full': True,
   'nodes': 1,
   'results': 1,
   'resultsFull': 1},
  'children': [{'id': 'id:cord19:cord19::69gftii4',
    'relevance': 0.3854098618030548,
    'source': 'cord19_content',
    'fields': {'sddocname': 'cord19',
     'documentid': 'id:cord19:cord19::69gftii4',
     'cord_uid': '69gftii4',
     'title': 'The gene of an archaeal α-l-fucosidase is expressed by translational frameshifting',
     'pretrained_bert_tiny_doc_token_ids': {'cells': [{'address': {'d0': '0'},
        'value': 1996.0},
       {'address': {'d0': '1'}, 'value': 4962.0},
       {'address': {'d0': '2'}, 'value': 1997.0},
       {'address': {'d0': '3'}, 'value': 2019.0},
       {'address': {'d0': '4'}, 'value': 7905.0},
       {'address': {'d0': '5'}, 'value': 21996.0},
       {'address': {'d0': '6'}, 'value': 2140.0},
       {'address': {'d0': '7'}, 'value': 1155.0},
   

In [None]:
result.number_documents_retrieved

Easy to encode queries sent to the Vespa application:

In [None]:
bert_config.query_input_ids(queries=["this is a query", "here is another query"])

Easy to encode document text to be fed to the Vespa application:

In [None]:
bert_config.doc_input_ids(docs=["this is a text", "another text"])

Easy to generate data to train model that is compatible to the data used to serve the model on Vespa:

In [None]:
encodings = bert_config.create_encodings(
    queries=["this is a query", "here is another query"],
    docs=["this is a text", "another text"]
)

In [None]:
encodings.keys()

Validate model

In [None]:
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#model = BertModel.from_pretrained('bert-base-uncased')

In [None]:
inputs = tokenizer("Hello, my dog is cute", "another one", return_tensors="pt")

In [None]:
inputs

In [None]:
outputs = model(**inputs)

In [None]:
outputs

In [None]:
dummy_input = bert_config._generate_dummy_inputs()

In [None]:
dummy_input

In [None]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-tiny") # This could be any pytorch BERT model 


In [None]:
output = model(**dummy_input)

In [None]:
output

In [None]:
len(output.logits.shape)

In [None]:
type(model)

Much easier to export a pytorch model (no need to specify input and output names, generate dummy data):

In [None]:
from transformers import BertForSequenceClassification

isinstance(model, BertForSequenceClassification)


In [None]:
from transformers import BertForSequenceClassification

isinstance(mod)

model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-tiny") # This could be any pytorch BERT model 
bert_config.export_to_onnx(model=model, output_path="bert_tiny.onnx")

### Behind the scenes

In [None]:
def create_bert_encodings(queries, docs, tokenizer, query_input_size, doc_input_size):
    queries_encodings = tokenizer(
        queries, truncation=True, max_length=query_input_size-2, add_special_tokens=False
    )
    docs_encodings = tokenizer(
        docs, truncation=True, max_length=doc_input_size-1, add_special_tokens=False
    )
    
    TOKEN_NONE=0
    TOKEN_CLS=101
    TOKEN_SEP=102

    input_ids = []
    token_type_ids = []
    attention_mask = []
    for query_input_ids, doc_input_ids in zip(queries_encodings["input_ids"], docs_encodings["input_ids"]):
        # create input id
        input_id = [TOKEN_CLS] + query_input_ids + [TOKEN_SEP] + doc_input_ids + [TOKEN_SEP]
        number_tokens = len(input_id)
        padding_length = max(128 - number_tokens, 0)
        input_id = input_id + [TOKEN_NONE] * padding_length
        input_ids.append(input_id)
        # create token id
        token_type_id = [0] * len([TOKEN_CLS] + query_input_ids + [TOKEN_SEP]) + [1] * len(doc_input_ids + [TOKEN_SEP]) + [TOKEN_NONE] * padding_length
        token_type_ids.append(token_type_id)
        # create attention_mask
        attention_mask.append([1] * number_tokens + [TOKEN_NONE] * padding_length)

    encodings = {
        "input_ids": input_ids,
        "token_type_ids": token_type_ids,
        "attention_mask": attention_mask
    }
    return encodings

In [None]:
from transformers import BertTokenizerFast

model_name = "prajjwal1/bert-tiny"
#model_name = "google/bert_uncased_L-4_H-512_A-8"
tokenizer = BertTokenizerFast.from_pretrained(model_name)

In [None]:
encodings = create_bert_encodings(
    queries=["dummy query 1"],
    docs=["dummy document 1"],
    tokenizer=tokenizer,
    query_input_size=32,
    doc_input_size=96
)

In [None]:
encodings

In [None]:
from torch import tensor

tensor(encodings["input_ids"]).unsqueeze(0).shape

In [None]:
tensor(encodings["input_ids"]).shape

In [None]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained(model_name)

In [None]:
from torch.onnx import export

model_onnx_path = "bert_tiny.onnx"
dummy_input = (
    tensor(encodings["input_ids"]), 
    tensor(encodings["token_type_ids"]), 
    tensor(encodings["attention_mask"]), 
)
input_names = ["input_ids", "token_type_ids", "attention_mask"]
output_names = ["logits"]
export(
    model, dummy_input, model_onnx_path, input_names = input_names, 
    output_names = output_names, verbose=False, opset_version=11
)

## Extending the application to deploy interaction BERT

In [None]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-tiny") # This could be any pytorch BERT model 

In [None]:
from vespa.package import SecondPhaseRanking

app_package.add_bert_ranking(
    model_id = "pretrained_bert_tiny", 
    model_config = bert_config, 
    model = model,
    inherits="default",
    first_phase="bm25(title) + bm25(body)",
    second_phase=SecondPhaseRanking(
        rerank_count=10, expression="logit0"
    ),
)

### Document token ids

* We need a field for the document token ids. 

* Ideally we should name this field according to a BERT model id in case we want to deploy multiple models on the same application. 

* We also need to specify the maximum size of the document vector.

In [None]:
app_package.schema.add_fields(
    Field(name = "doc_token_ids", type = "tensor<float>(d0[96])", indexing = ["attribute", "summary"]),
)

### Query vector type

* Each model deployed should use its own query vector to send the token ids.

* Similar to the document vector the name of the query vector should be influenced by the model used.

* We also need to specify the maximum size of the query vector.

In [None]:
from vespa.package import QueryTypeField

    app_package.query_profile_type.add_fields(
        QueryTypeField(name="ranking.features.query(query_token_ids)", type = "tensor<float>(d0[32])")
    )

### ONNX model

* The model name here should be the string used to add to the name of the document and query vectors to ensure uniqueness. 

* Somehow we need to make sure the model `bert.onnx` ends up in the write location when deploying the app.

* Need to coordinate input and output names to make sure there is no clash between different models.

In [None]:
from vespa.package import OnnxModel

app_package.schema.add_model(
    OnnxModel(
        model_name="bert",
        file_path="files/bert.onnx",
        inputs={
            "input_ids": "input_ids",
            "token_type_ids": "token_type_ids",
            "attention_mask": "attention_mask",
        },
        outputs={"logits": "logits"},
    )
)

### BERT rank profile

In [None]:
from vespa.package import RankProfile, Function, SecondPhaseRanking

app_package.schema.add_rank_profile(
    RankProfile(
        name="bert",
        inherits="default",
        constants={"TOKEN_NONE": 0, "TOKEN_CLS": 101, "TOKEN_SEP": 102}, 
        functions=[
            Function(
                name="question_length",
                expression="sum(map(query(query_token_ids), f(a)(a > 0)))",
            ),
            Function(
                name="doc_length",
                expression="sum(map(attribute(doc_token_ids), f(a)(a > 0)))",
            ),
            Function(
                name="input_ids",
                expression="tensor<float>(d0[1],d1[128])(\n"
                "    if (d1 == 0,\n"
                "        TOKEN_CLS,\n"
                "    if (d1 < question_length + 1,\n"
                "        query(query_token_ids){d0:(d1-1)},\n"
                "    if (d1 == question_length + 1,\n"
                "        TOKEN_SEP,\n"
                "    if (d1 < question_length + doc_length + 2,\n"
                "        attribute(doc_token_ids){d0:(d1-question_length-2)},\n"
                "    if (d1 == question_length + doc_length + 2,\n"
                "        TOKEN_SEP,\n"
                "        TOKEN_NONE\n"
                "    ))))))",
            ),
            Function(
                name="attention_mask",
                expression="map(input_ids, f(a)(a > 0))",
            ),
            Function(
                name="token_type_ids",
                expression="tensor<float>(d0[1],d1[128])(\n"
                "    if (d1 < question_length,\n"
                "        0,\n"
                "    if (d1 < question_length + doc_length,\n"
                "        1,\n"
                "        TOKEN_NONE\n"
                "    )))",
            ),
            Function(
                name="eval",
                expression="tensor(x{}):{x1:onnxModel(bert).logits{d0:0,d1:0}}",
            ),
            
        ],     
        first_phase="bm25(title) + bm25(body)",
        second_phase=SecondPhaseRanking(
            rerank_count=10, expression="sum(eval)"
        ),
        summary_features=[
            "onnxModel(bert).logits",
            "input_ids",
            "attention_mask",
            "token_type_ids",
        ],
    )

)

### Redeploy the application

In [None]:
app = vespa_docker.deploy(
    application_package = app_package,
    disk_folder=disk_folder
)

Steps required to deploy BERT:

* Add onnx-model in the sd.
* Create a models folder in the same level of the schemas folder
* Create a rank-profile that define inputs and use the model to rank
* Add the input field tensors related to the docs
* Add query profile with the relevant tensors

## Deploy application from disk

In [None]:
vespa_docker.container = None

In [None]:
vespa_docker.container

In [None]:
app = vespa_docker.deploy_from_disk(
    application_name="cord19", 
    disk_folder="/Users/tmartins/projects/vespa/pyvespa/docs/sphinx/source/use_cases/cord19/sample_application", 
    container_memory="10G"
)

In [None]:
app.deployment_message

In [None]:
import json

test_sets = json.load(open("cord19/test_sets.json", "r"))

In [None]:
test_sets

In [None]:
documents_to_feed = []
for test_set in test_sets:
    for query_point in test_sets[test_set]:
        query = query_point["query"]
        print(query)
        result = app.query(
            query=query, 
            query_model=Query(
                match_phase = OR(),
                rank_profile = Ranking(name="bm25")
            ),
            timeout="20s",    
            hits = 100
        )
        assert len(result.hits) > 0
        for hit in result.hits:
            documents_to_feed.append(
                {"cord_uid": hit["fields"]["cord_uid"],
                 "title": hit["fields"]["title-full"]}
            )

In [None]:
import json

with open("cord19/documents_to_feed.json", "w") as f:
    f.write(json.dumps(documents_to_feed))

In [None]:
import json

with open("cord19/documents_to_feed.json", "r") as f:
    documents_to_feed = json.load(f)

In [None]:
documents_to_feed[0:2]

In [None]:
training_data_batch.head()

In [None]:
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [None]:
from vespa.application import Vespa

app = Vespa(url = "http://localhost", port = 8080)

In [None]:
from vespa.query import Query, OR, RankProfile as Ranking

query = 'coronavirus origin'
result = app.query(
    query=query, 
    query_model=Query(
        match_phase = OR(),
        rank_profile = Ranking(name="default")),
    timeout="20s",    
)

In [None]:
result.json

In [None]:
from vespa.query import RankProfile as Ranking

query = 'coronavirus origin'
result = app.query(
    query=query, 
    query_model=Query(
        match_phase = OR(),
        rank_profile = Ranking(name="bert_index_1")),
    timeout="20s",
    debug_request=False,
    **{"ranking.features.query(query_token_ids)": str(tokenizer(
                str(query), 
                truncation=True, 
                padding="max_length",
                max_length=64, 
                add_special_tokens=False
            )["input_ids"])}
    
)

In [None]:
[hit["relevance"] for hit in result.hits]

In [None]:
result.request_body

In [None]:
result.json

### Define query models that we want to evaluate

In [None]:
from vespa.query import Query, RankProfile, OR

query_models = {
    "or_bm25": Query(
        match_phase = OR(),
        rank_profile = Ranking(name="bm25")
    ),
    "or_bm25_bert": Query(
        match_phase = OR(),
        rank_profile = Ranking(name="bert")
    ),
    "or_bm25_bert_index_1": Query(
        match_phase = OR(),
        rank_profile = Ranking(name="bert_index_1")
    )
    
}
        

In [None]:
from vespa.evaluation import MatchRatio, Recall, ReciprocalRank, NormalizedDiscountedCumulativeGain

eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10), NormalizedDiscountedCumulativeGain(at=10)]

In [None]:
        evaluation = []
        for query_data in labelled_data:
            evaluation_query = self.evaluate_query(
                eval_metrics=eval_metrics,
                query_model=query_model,
                query_id=query_data["query_id"],
                query=query_data["query"],
                id_field=id_field,
                relevant_docs=query_data["relevant_docs"],
                default_score=default_score,
                **kwargs
            )
            evaluation.append(evaluation_query)
        evaluation = DataFrame.from_records(evaluation)


In [None]:
for test in test_sets:
    print(test)

In [None]:
query_data["relevant_docs"]

In [None]:
from pandas import DataFrame

evaluations = {}
for test_set in test_sets:
    evaluations[test_set] = {}
    for query_model in query_models:
        evaluation = []
        for query_data in test_sets[test_set]:
            print(query_data["query_id"])
            evaluation_query = app.evaluate_query(
                eval_metrics=eval_metrics,
                query_model=query_models[query_model],
                query_id=query_data["query_id"],
                query=query_data["query"],
                id_field = "cord_uid",
                relevant_docs=query_data["relevant_docs"],
                hits = 10,
                timeout="100s",
                **{"ranking.features.query(query_token_ids)": str(tokenizer(
                            str(query_data["query"]), 
                            truncation=True, 
                            padding="max_length",
                            max_length=64, 
                            add_special_tokens=False
                        )["input_ids"])}            
            )
            evaluation.append(evaluation_query)
        evaluations[test_set][query_model] = DataFrame.from_records(evaluation)

In [None]:
evaluations

In [None]:
import pandas as pd

metric_values = []
for test_set in test_sets:
    for query_model in query_models:
        for metric in eval_metrics:
            metric_values.append(
                pd.DataFrame(
                    data={
                        "test_set": test_set, 
                        "query_model": query_model, 
                        "metric": metric.name, 
                        "value": evaluations[test_set][query_model][metric.name + "_value"].to_list()
                    }
                )
            )
metric_values = pd.concat(metric_values, ignore_index=True)

In [None]:
metric_values.head()

In [None]:
metric_values.metric.unique()

In [None]:
import plotly.express as px


fig = px.box(metric_values[metric_values.metric == "reciprocal_rank_10"], x="query_model", y="value", title = "RR @ 10")
fig.show()

In [None]:
metric_values.groupby(['query_model', 'metric']).median()