# Evaluate text search experiments from python with Vespa

> Define and compare Query Models with pyvespa API.

- toc: true 
- badges: false
- comments: true
- categories: [vespa, pyvespa, cord19, evaluation]

In [None]:
from vespa.package import ApplicationPackage, Field, FieldSet, RankProfile

app_package = ApplicationPackage(name="cord19")
app_package.schema.add_fields(
    Field(
        name = "cord_uid", 
        type = "string", 
        indexing = ["attribute", "summary"]
    ),
    Field(
        name = "title", 
        type = "string", 
        indexing = ["index", "summary"], 
        index = "enable-bm25"
    ),
    Field(
        name = "abstract", 
        type = "string", 
        indexing = ["index", "summary"], 
        index = "enable-bm25"
    )
)
app_package.schema.add_field_set(
    FieldSet(name = "default", fields = ["title", "abstract"])
)
app_package.schema.add_rank_profile(
    RankProfile(
        name = "bm25", 
        first_phase = "bm25(title) + bm25(abstract)"
    )
)


In [None]:
from vespa.package import VespaDocker

vespa_docker = VespaDocker(port=8080, disk_folder="/Users/tmartins/sample_app")
app = vespa_docker.deploy(
    application_package = app_package,
)

In [None]:
from pandas import read_csv

parsed_feed = read_csv(
    "https://thigm85.github.io/data/cord19/parsed_feed_100.csv"
)

In [None]:
for idx, row in parsed_feed.iterrows():
    fields = {
        "cord_uid": str(row["cord_uid"]),
        "title": str(row["title"]),
        "abstract": str(row["abstract"])
    }
    response = app.feed_data_point(
        schema = "cord19",
        data_id = str(row["cord_uid"]),
        fields = fields,
    )

In [None]:
query = {
    'yql': 'select * from sources * where userQuery();',
    'query': 'What is the role of endothelin-1',
    'ranking': 'bm25',
    'type': 'any',
    'presentation.timing': True,
    'hits': 3
}
res = app.query(body=query)
res.hits[0]

In [None]:
from vespa.query import QueryModel, RankProfile as Ranking, OR

res = app.query(
    query="What is the role of endothelin-1", 
    query_model=QueryModel(
        match_phase = OR(),
        rank_profile = Ranking(name="bm25")
    )
    
)
res.hits[0]

------

In this post we will show how to use the [pyvespa](https://pyvespa.readthedocs.io/en/latest/index.html) API to define and compare search engine experiments based on the text search app we built in [the first part](https://blog.vespa.ai/build-basic-text-search-app-from-python-with-vespa/) of this tutorial series. We will continue to use the [CORD19 sample data](https://ir.nist.gov/covidSubmit/data.html) that were used to feed the search app in the first tutorial. IT IS ASSUMED THAT ...

-----