In [1]:
import numpy as np
import pandas as pd
from pymilvus import MilvusClient

#### Data loading

In [2]:
data = pd.read_csv("./data/question_answer.csv")
data.head(3)

Unnamed: 0,id,question,answer
0,0,Is Disability Insurance Required By Law?,Not generally. There are five states that requ...
1,1,Can Creditors Take Life Insurance After ...,If the person who passed away was the one with...
2,2,Does Travelers Insurance Have Renters Ins...,One of the insurance carriers I represent is T...


In [3]:
data.shape

(1000, 3)

In [4]:
model_path = "./models/paraphrase-multilingual-mpnet-base-v2"

from sentence_transformers import SentenceTransformer
model = SentenceTransformer(model_path)

2024-06-05 12:40:43.458838: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-05 12:40:43.509680: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
text = list(data["question"])

embeddings = model.encode(text)

embeddings.shape

(1000, 768)

In [6]:
data["vector"] = list(embeddings)

In [7]:
data.head(3)

Unnamed: 0,id,question,answer,vector
0,0,Is Disability Insurance Required By Law?,Not generally. There are five states that requ...,"[-0.042642176, 0.2422998, -0.0092916805, -0.07..."
1,1,Can Creditors Take Life Insurance After ...,If the person who passed away was the one with...,"[0.011983045, 0.2737187, -0.0107803, -0.036528..."
2,2,Does Travelers Insurance Have Renters Ins...,One of the insurance carriers I represent is T...,"[-0.054962646, 0.26135075, -0.0066415817, -0.0..."


#### Connect to Milvus Lite

In [8]:
client = MilvusClient("./data/milvus_lite_question_answer.db")

#### Create Collection

In [9]:
client.create_collection(collection_name="question_answer",dimension=768)

In [10]:
client.list_collections()

['question_answer']

#### Data insert

In [11]:
dict_records = data.to_dict(orient='records')

In [13]:
res = client.insert(collection_name="question_answer", data=dict_records)

#### Search

In [14]:
test_question = np.array(data["question"])[950]
print("test data :", test_question)

test_emb = model.encode(test_question)
print(test_emb.shape, test_emb.dtype)

test data : Is  Car  Insurance  Federal  Law?
(768,) float32


In [15]:
op = client.search(collection_name="question_answer",
                   data=[test_emb],
                   limit=5,
                   output_fields=["question", "answer"])

In [16]:
op[0]

[{'id': 950,
  'distance': 0.9999998211860657,
  'entity': {'question': 'Is  Car  Insurance  Federal  Law?',
   'answer': 'The are 3 basic tax deferred annuity crediting methods: interest rate crediting, indice crediting and separate accounts using equity and bond investments. The annuity company makes money in their portfolio spread between what earn and what they credit to a policy as well as mortality costs. They also charge fees that generate income for the company.'}},
 {'id': 132,
  'distance': 0.8885902166366577,
  'entity': {'question': 'Is  Auto  Insurance  A  Federal  Requirement?',
   'answer': 'Auto Insurance is not a requirement mandated by Federal law, but instead is required by most if not all states as a condition of registering a car for use on public access roads and highways. Property and Casualty insurance, including car insurance is regulated by state authority as opposed to the Federal Government.'}},
 {'id': 364,
  'distance': 0.8341759443283081,
  'entity': {'qu