In [3]:
%config Completer.use_jedi = False

## Build the application

In [None]:
from vespa.package import Document, Field

context_document = Document(
    fields=[
        Field(name="questions", type="array<int>", indexing=["summary", "attribute"]),
        Field(name="dataset", type="string", indexing=["summary", "attribute"]),
        Field(name="context_id", type="int", indexing=["summary", "attribute"]),        
        Field(name="text", type="string", indexing=["summary", "index"], index="enable-bm25"),                
    ]
)

In [None]:
from vespa.package import Schema, FieldSet, RankProfile

context_schema = Schema(
    name="context",
    document=context_document, 
    fieldsets=[FieldSet(name="default", fields=["text"])], 
    rank_profiles=[
        RankProfile(name="bm25", inherits="default", first_phase="bm25(text)"), 
        RankProfile(name="nativeRank", inherits="default", first_phase="nativeRank(text)")]
)

In [None]:
from vespa.package import HNSW

sentence_document = Document(
    inherits="context", 
    fields=[
        Field(
            name="sentence_embedding", 
            type="tensor<float>(x[512])", 
            indexing=["attribute", "index"], 
            ann=HNSW(
                distance_metric="euclidean", 
                max_links_per_node=16, 
                neighbors_to_explore_at_insert=500
            )
        )
    ]
)

In [None]:
sentence_schema = Schema(
    name="sentence", 
    document=sentence_document, 
    fieldsets=[FieldSet(name="default", fields=["text"])], 
    rank_profiles=[
        RankProfile(
            name="semantic-similarity", 
            inherits="default", 
            first_phase="closeness(sentence_embedding)"
        ),
        RankProfile(
            name="bm25", 
            inherits="default", 
            first_phase="bm25(text)"
        ),
        RankProfile(
            name="bm25-semantic-similarity", 
            inherits="default", 
            first_phase="bm25(text) + closeness(sentence_embedding)"
        )
    ]
)

In [None]:
from vespa.package import ApplicationPackage, QueryProfile, QueryProfileType, QueryTypeField

app_package = ApplicationPackage(
    name="qa", 
    schema=[context_schema, sentence_schema], 
    query_profile=QueryProfile(),
    query_profile_type=QueryProfileType(
        fields=[
            QueryTypeField(
                name="ranking.features.query(query_embedding)", 
                type="tensor<float>(x[512])"
            )
        ]
    )
)

In [None]:
from vespa.package import VespaDocker

vespa_docker = VespaDocker(
    port=8081, 
    container_memory="8G", 
    disk_folder="/Users/tmartins/qa_app" # requires absolute path
)
app = vespa_docker.deploy(application_package=app_package)

## Feed sentence data

In [1]:
from vespa.application import Vespa

app = Vespa(url = "http://localhost", port = 8081)

In [2]:
import requests, json

context_data = json.loads(
    requests.get("https://data.vespa.oath.cloud/blog/qa/qa_squad_context_data.json").text
)

In [None]:
import time
start_time = time.time()
for context in context_data:
    app.feed_data_point(schema="context", data_id=context["context_id"], fields=context)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
batch = [{"id": x["context_id"], "fields": x} for x in context_data]

In [None]:
import time
start_time = time.time()
res = app.feed_batch(schema = "context", batch=batch)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
res

In [None]:
from vespa.application import VespaAsync
import time

start_time = time.time()
async with VespaAsync(app) as async_app:
    await async_app.feed_batch(schema= "context", batch=batch)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
import time
start_time = time.time()
res = app.feed_batch(schema = "context", batch=batch, asynchronous=True)
print("--- %s seconds ---" % (time.time() - start_time))
res

In [None]:
import time
start_time = time.time()
res = await app.feed_batch(schema = "context", batch=batch, asynchronous=True)
print("--- %s seconds ---" % (time.time() - start_time))

## Output equivalence 

Suggestion creating a unified object called VespaResponse containing json, status_code, url and operation_type (feed, get, update, delete)

### Feed

In [12]:
from vespa.application import VespaAsync

async with VespaAsync(app) as async_app:
    res_feed_async = await async_app.feed_data_point(schema="context", data_id=context_data[0]["context_id"], fields=context_data[0])

In [19]:
await res_feed_async.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

In [18]:
res_feed_async.status

200

In [22]:
str(res_feed_async.url)

'http://localhost:8081/document/v1/context/context/docid/0'

In [46]:
res_feed_async.request_info

RequestInfo(url=URL('http://localhost:8081/document/v1/context/context/docid/0'), method='POST', headers=<CIMultiDictProxy('Host': 'localhost:8081', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'User-Agent': 'Python/3.9 aiohttp/3.7.4.post0', 'Content-Length': '788', 'Content-Type': 'application/json')>, real_url=URL('http://localhost:8081/document/v1/context/context/docid/0'))

In [14]:
res_feed_sync = app.feed_data_point(schema="context", data_id=context_data[0]["context_id"], fields=context_data[0])

In [16]:
res_feed_sync.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

In [17]:
res_feed_sync.status_code

200

In [20]:
res_feed_sync.url

'http://localhost:8081/document/v1/context/context/docid/0'

### Get

In [26]:
res_get_sync = app.get_data(schema="context", data_id="0")

In [28]:
res_get_sync.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0',
 'fields': {'text': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
  'dataset': 'squad',
  'context_id': 0,
  'questions': [0, 1, 2, 3, 4]}}

In [29]:
res_get_sync.status_code

200

In [30]:
res_get_sync.url

'http://localhost:8081/document/v1/context/context/docid/0'

In [35]:
async with app.asyncio() as async_app:
    res_get_async = await async_app.get_data(schema="context", data_id="0")

In [36]:
res_get_async.status

200

In [38]:
str(res_get_async.url)

'http://localhost:8081/document/v1/context/context/docid/0'

In [40]:
await res_get_async.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0',
 'fields': {'text': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
  'dataset': 'squad',
  'context_id': 0,
  'questions': [0, 1, 2, 3, 4]}}

### Update

In [41]:
res_update_sync = app.update_data(schema="context", data_id="0", fields=context_data[0])

In [42]:
res_update_sync.status_code

200

In [43]:
res_update_sync.url

'http://localhost:8081/document/v1/context/context/docid/0?create=false'

In [44]:
res_update_sync.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

In [48]:
async with app.asyncio() as async_app:
    res_update_async = await async_app.update_data(schema="context", data_id="0", fields=context_data[0])

  res_update_async = await async_app.update_data(schema="context", data_id="0", fields=context_data[0])


In [49]:
res_update_async.status

200

In [51]:
str(res_update_async.url)

'http://localhost:8081/document/v1/context/context/docid/0?create=false'

In [53]:
await res_update_async.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

### Delete

In [54]:
res_delete_sync = app.delete_data(schema="context", data_id="0")

In [55]:
res_delete_sync.status_code

200

In [56]:
res_delete_sync.url

'http://localhost:8081/document/v1/context/context/docid/0'

In [57]:
res_delete_sync.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

In [58]:
async with app.asyncio() as async_app:
    res_delete_async = await async_app.delete_data(schema="context", data_id="0")

In [59]:
res_delete_async.status

200

In [61]:
str(res_delete_async.url)

'http://localhost:8081/document/v1/context/context/docid/0'

In [64]:
await res_delete_async.json()

{'pathId': '/document/v1/context/context/docid/0',
 'id': 'id:context:context::0'}

### Query

In [74]:
from vespa.query import QueryModel

res_query_sync = app.query(query="this is a test", query_model=QueryModel())

In [76]:
res_query_sync.json

{'root': {'id': 'toplevel',
  'relevance': 1.0,
  'fields': {'totalCount': 62},
  'coverage': {'coverage': 100,
   'documents': 18895,
   'full': True,
   'nodes': 2,
   'results': 2,
   'resultsFull': 2},
  'children': [{'id': 'id:context:context::14450',
    'relevance': 0.2856235721334474,
    'source': 'qa_content',
    'fields': {'sddocname': 'context',
     'documentid': 'id:context:context::14450',
     'questions': [66541, 66542, 66543, 66544, 66545],
     'dataset': 'squad',
     'context_id': 14450,
     'text': 'Load testing is primarily concerned with testing that the system can continue to operate under a specific load, whether that be large quantities of data or a large number of users. This is generally referred to as software scalability. The related load testing activity of when performed as a non-functional activity is often referred to as endurance testing. Volume testing is a way to test software functions even when certain components (for example a file or database

In [77]:
async with app.asyncio() as async_app:
    res_query_async = await async_app.query(query="this is a test", query_model=QueryModel())

In [78]:
res_query_async.json

{'root': {'id': 'toplevel',
  'relevance': 1.0,
  'fields': {'totalCount': 62},
  'coverage': {'coverage': 100,
   'documents': 18895,
   'full': True,
   'nodes': 2,
   'results': 2,
   'resultsFull': 2},
  'children': [{'id': 'id:context:context::14450',
    'relevance': 0.2856235721334474,
    'source': 'qa_content',
    'fields': {'sddocname': 'context',
     'documentid': 'id:context:context::14450',
     'questions': [66541, 66542, 66543, 66544, 66545],
     'dataset': 'squad',
     'context_id': 14450,
     'text': 'Load testing is primarily concerned with testing that the system can continue to operate under a specific load, whether that be large quantities of data or a large number of users. This is generally referred to as software scalability. The related load testing activity of when performed as a non-functional activity is often referred to as endurance testing. Volume testing is a way to test software functions even when certain components (for example a file or database