# 05. Fine-tuned search

We have a new model, a fine-tuned version of the original one. It's high time to determine if we were able to achieve some better results. We'll be still using the top-k-accuracy metric, so we can compare the new model performance.

In [None]:
from quaterion_models.model import SimilarityModel

In [None]:
tweets_qa_model = SimilarityModel.load("tweets_qa_servable")

In [None]:
import qdrant_client

In [None]:
client = qdrant_client.QdrantClient(
    host="localhost", port=6333, timeout=30
)

In [None]:
from datasets import load_dataset

import pandas as pd

In [None]:
tweet_qa_dataset = load_dataset("tweet_qa")
train_df = pd.DataFrame(tweet_qa_dataset["train"])

In [None]:
from qdrant_client.http import models as rest

In [None]:
vector_size = SimilarityModel.get_encoders_output_size(tweets_qa_model.encoders)
client.recreate_collection(
    collection_name="tweets-qa",
    vectors_config=rest.VectorParams(
        size=vector_size,
        distance=rest.Distance.COSINE,
    ),
)

In [None]:
answer_embeddings = tweets_qa_model.encode(train_df["Tweet"].tolist())
client.upload_collection(
    collection_name="tweets-qa",
    vectors=answer_embeddings.tolist(),
    payload=[{"qid": qid} for qid in train_df["qid"]],
)

In [None]:
client.get_collection("tweets-qa")

In [None]:
question_embeddings = tweets_qa_model.encode(train_df["Question"].tolist())

In [None]:
from typing import List


def top_k_accuracy(k: int):
    found_in_top = 0
    for target_qid, question_embedding in zip(train_df["qid"],
                                              question_embeddings):
        response = client.search(
            collection_name="tweets-qa",
            query_vector=question_embedding,
            limit=k,
            with_payload=True,
        )
        top_qids = [point.payload.get("qid") for point in response]
        if target_qid in top_qids:
            found_in_top += 1
    return found_in_top / train_df.shape[0]

In [None]:
top_k_accuracy(10)

In [None]:
top_k_accuracy(100)