In [1]:
%config Completer.use_jedi = False

## Build the application

In [None]:
from vespa.package import Document, Field

context_document = Document(
    fields=[
        Field(name="questions", type="array<int>", indexing=["summary", "attribute"]),
        Field(name="dataset", type="string", indexing=["summary", "attribute"]),
        Field(name="context_id", type="int", indexing=["summary", "attribute"]),        
        Field(name="text", type="string", indexing=["summary", "index"], index="enable-bm25"),                
    ]
)

In [None]:
from vespa.package import Schema, FieldSet, RankProfile

context_schema = Schema(
    name="context",
    document=context_document, 
    fieldsets=[FieldSet(name="default", fields=["text"])], 
    rank_profiles=[
        RankProfile(name="bm25", inherits="default", first_phase="bm25(text)"), 
        RankProfile(name="nativeRank", inherits="default", first_phase="nativeRank(text)")]
)

In [None]:
from vespa.package import HNSW

sentence_document = Document(
    inherits="context", 
    fields=[
        Field(
            name="sentence_embedding", 
            type="tensor<float>(x[512])", 
            indexing=["attribute", "index"], 
            ann=HNSW(
                distance_metric="euclidean", 
                max_links_per_node=16, 
                neighbors_to_explore_at_insert=500
            )
        )
    ]
)

In [None]:
sentence_schema = Schema(
    name="sentence", 
    document=sentence_document, 
    fieldsets=[FieldSet(name="default", fields=["text"])], 
    rank_profiles=[
        RankProfile(
            name="semantic-similarity", 
            inherits="default", 
            first_phase="closeness(sentence_embedding)"
        ),
        RankProfile(
            name="bm25", 
            inherits="default", 
            first_phase="bm25(text)"
        ),
        RankProfile(
            name="bm25-semantic-similarity", 
            inherits="default", 
            first_phase="bm25(text) + closeness(sentence_embedding)"
        )
    ]
)

In [None]:
from vespa.package import ApplicationPackage, QueryProfile, QueryProfileType, QueryTypeField

app_package = ApplicationPackage(
    name="qa", 
    schema=[context_schema, sentence_schema], 
    query_profile=QueryProfile(),
    query_profile_type=QueryProfileType(
        fields=[
            QueryTypeField(
                name="ranking.features.query(query_embedding)", 
                type="tensor<float>(x[512])"
            )
        ]
    )
)

In [None]:
from vespa.package import VespaDocker

vespa_docker = VespaDocker(
    port=8081, 
    container_memory="8G", 
    disk_folder="/Users/tmartins/qa_app" # requires absolute path
)
app = vespa_docker.deploy(application_package=app_package)

## Feed sentence data

In [2]:
from vespa.application import Vespa

app = Vespa(url = "http://localhost", port = 8081)

In [3]:
import requests, json

context_data = json.loads(
    requests.get("https://data.vespa.oath.cloud/blog/qa/qa_squad_context_data.json").text
)

In [None]:
import time
start_time = time.time()
for context in context_data:
    app.feed_data_point(schema="context", data_id=context["context_id"], fields=context)
print("--- %s seconds ---" % (time.time() - start_time))

In [4]:
batch = [{"id": x["context_id"], "fields": x} for x in context_data]

In [None]:
import time
start_time = time.time()
res = app.feed_batch(schema = "context", batch=batch)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
res

In [None]:
from vespa.application import VespaAsync
import time

start_time = time.time()
async with VespaAsync(app) as async_app:
    await async_app.feed_batch(schema= "context", batch=batch)
print("--- %s seconds ---" % (time.time() - start_time))

In [5]:
import time
start_time = time.time()
res = app.feed_batch(schema = "context", batch=batch, asynchronous=True)
print("--- %s seconds ---" % (time.time() - start_time))
res

--- 8.797645568847656e-05 seconds ---


<coroutine object Vespa._feed_batch_async at 0x149a9a440>

In [7]:
import asyncio

loop = asyncio.get_running_loop()

In [18]:
task = loop.create_task(res)

In [26]:
task.result()

RuntimeError: cannot reuse already awaited coroutine

In [None]:
import time
start_time = time.time()
res = await app.feed_batch(schema = "context", batch=batch, asynchronous=True)
print("--- %s seconds ---" % (time.time() - start_time))

## Output equivalence 

Suggestion creating a unified object called VespaResponse containing json, status_code, url and operation_type (feed, get, update, delete)

### Feed

In [None]:
from vespa.application import VespaAsync

async with VespaAsync(app) as async_app:
    res_feed_async = await async_app.feed_data_point(schema="context", data_id=context_data[0]["context_id"], fields=context_data[0])

In [None]:
await res_feed_async.json()

In [None]:
res_feed_async.status

In [None]:
str(res_feed_async.url)

In [None]:
res_feed_async.request_info

In [None]:
res_feed_sync = app.feed_data_point(schema="context", data_id=context_data[0]["context_id"], fields=context_data[0])

In [None]:
res_feed_sync.json()

In [None]:
res_feed_sync.status_code

In [None]:
res_feed_sync.url

### Get

In [None]:
res_get_sync = app.get_data(schema="context", data_id="0")

In [None]:
res_get_sync.json()

In [None]:
res_get_sync.status_code

In [None]:
res_get_sync.url

In [None]:
async with app.asyncio() as async_app:
    res_get_async = await async_app.get_data(schema="context", data_id="0")

In [None]:
res_get_async.status

In [None]:
str(res_get_async.url)

In [None]:
await res_get_async.json()

### Update

In [None]:
res_update_sync = app.update_data(schema="context", data_id="0", fields=context_data[0])

In [None]:
res_update_sync.status_code

In [None]:
res_update_sync.url

In [None]:
res_update_sync.json()

In [None]:
async with app.asyncio() as async_app:
    res_update_async = await async_app.update_data(schema="context", data_id="0", fields=context_data[0])

In [None]:
res_update_async.status

In [None]:
str(res_update_async.url)

In [None]:
await res_update_async.json()

### Delete

In [None]:
res_delete_sync = app.delete_data(schema="context", data_id="0")

In [None]:
res_delete_sync.status_code

In [None]:
res_delete_sync.url

In [None]:
res_delete_sync.json()

In [None]:
async with app.asyncio() as async_app:
    res_delete_async = await async_app.delete_data(schema="context", data_id="0")

In [None]:
res_delete_async.status

In [None]:
str(res_delete_async.url)

In [None]:
await res_delete_async.json()

### Query

In [None]:
from vespa.query import QueryModel

res_query_sync = app.query(query="this is a test", query_model=QueryModel())

In [None]:
res_query_sync.json

In [None]:
async with app.asyncio() as async_app:
    res_query_async = await async_app.query(query="this is a test", query_model=QueryModel())

In [None]:
res_query_async.json