In [2]:
!pip install pandas

Collecting pandas
  Using cached pandas-2.2.3-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.1.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     ------------------- ------------------ 30.7/60.8 kB 660.6 kB/s eta 0:00:01
     -------------------------------------- 60.8/60.8 kB 816.2 kB/s eta 0:00:00
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp311-cp311-win_amd64.whl (11.6 MB)
Downloading numpy-2.1.3-cp311-cp311-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
   ---------------------------------------- 0.1/12.9 MB 4.2 MB/s eta 0:00:04
   ---- ----------------------------------- 1.3/12.9 MB 16.8 MB/s eta 0:00:01
   ----------


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
from vespa.application import Vespa
from vespa.io import VespaResponse, VespaQueryResponse


In [6]:
def display_hits_as_df(response: VespaQueryResponse, fields) -> pd.DataFrame:
    records = []
    for hit in response.hits:
        record = {}
        for field in fields:
            record[field] = hit["fields"][field]
        records.append(record)
    return pd.DataFrame(records)


def keyword_search(app, search_query):
    query = {
        "yql": "select * from sources * where userQuery() limit 5",
        "query": search_query,
        "ranking": "bm25",
    }
    response = app.query(query)
    return display_hits_as_df(response, ["doc_id", "title"])


def semantic_search(app, query):
    query = {
        "yql": "select * from sources * where ({targetHits:100}nearestNeighbor(embedding,e)) limit 5",
        "query": query,
        "ranking": "semantic",
        "input.query(e)": "embed(@query)"
    }
    response = app.query(query)
    return display_hits_as_df(response, ["doc_id", "title"])


def get_embedding(doc_id):
    query = {
        "yql" : f"select doc_id, title, text, embedding from content.doc where doc_id contains '{doc_id}'",
        "hits": 1
    }
    result = app.query(query)
    
    if result.hits:
        return result.hits[0]
    return None


def query_movies_by_embedding(embedding_vector):
    query = {
        'hits': 5,
        'yql': 'select * from content.doc where ({targetHits:5}nearestNeighbor(embedding, user_embedding))',
        'ranking.features.query(user_embedding)': str(embedding_vector),
        'ranking.profile': 'recommendation'
    }
    return app.query(query)

In [11]:
# Replace with the host and port of your local Vespa instance
app = Vespa(url="http://localhost", port=8080)

query = "Harry Potter and the Half-Blood Prince"

df = keyword_search(app, query)
print(df.head())

  doc_id                                     title
0    767    Harry Potter and the Half-Blood Prince
1    671  Harry Potter and the Philosopher's Stone
2    674       Harry Potter and the Goblet of Fire
3    673  Harry Potter and the Prisoner of Azkaban
4  13967                               Miss Potter


In [9]:

df = semantic_search(app, query)
print(df.head())

   doc_id                                      title
0     767     Harry Potter and the Half-Blood Prince
1     675  Harry Potter and the Order of the Phoenix
2     672    Harry Potter and the Chamber of Secrets
3     674        Harry Potter and the Goblet of Fire
4  168705                                 BloodRayne


In [8]:
emb = get_embedding("767")
results = query_movies_by_embedding(emb["fields"]["embedding"])
df = display_hits_as_df(results, ["doc_id", "title", "text"])
print(df.head())

  doc_id                                      title  \
0    767     Harry Potter and the Half-Blood Prince   
1    675  Harry Potter and the Order of the Phoenix   
2    672    Harry Potter and the Chamber of Secrets   
3    671   Harry Potter and the Philosopher's Stone   
4    674        Harry Potter and the Goblet of Fire   

                                                text  
0  As Harry begins his sixth year at Hogwarts, he...  
1  Returning for his fifth year of study at Hogwa...  
2  Ignoring threats to his life, Harry returns to...  
3  Harry Potter has lived under the stairs at his...  
4  Harry starts his fourth year at Hogwarts, comp...  
