### 06.01 Setting up Milvus for RAG

In [1]:
#Create the Connection and database for RAG
from pymilvus import connections
from pymilvus import db,Collection

from pymilvus import utility

connections.add_connection(
    rag_conn={
        "host": "localhost",
        "port": "19530",
        "username" : "username",
        "password" : "password"
    })

conn_name="rag_conn"
db_name="rag_db"

connections.connect(conn_name)
connections.list_connections()

current_dbs=db.list_database(using=conn_name)
print("Databases: ", current_dbs)

if ( db_name not in current_dbs):
    print("Creating database :", db_name)
    resume_db = db.create_database(db_name, using=conn_name) 

#Switch to the new database
db.using_database(db_name, using=conn_name)



Databases:  ['default', 'cache_db']
Creating database : rag_db


In [2]:
#Create a new collection for RAG
from pymilvus import CollectionSchema, FieldSchema, DataType, Collection
import json

chunk_id_field = FieldSchema(
    name="chunk_id",
    dtype=DataType.INT64,
    is_primary=True,
    max_length=32)

rag_text_field= FieldSchema(
    name="rag_text",
    dtype=DataType.VARCHAR,
    max_length=2048)

rag_embedding_field = FieldSchema(
    name="rag_embedding",
    dtype=DataType.FLOAT_VECTOR,
    dim=1536 #Define based on embedding used
)

rag_schema=CollectionSchema(
    fields=[chunk_id_field, rag_text_field, rag_embedding_field],
    description="RAG Schema",
    enable_dynamic_field=True
)

collection_name="rag_collection"

rag_collection=Collection(
    name=collection_name,
    schema=rag_schema,
    using=conn_name,
    shard_num=2
)

from pymilvus import utility
print("Collections: ", utility.list_collections(using=conn_name))

r_collection=Collection(collection_name, using=conn_name)
print("\n Schema :", r_collection.schema)

Collections:  ['rag_collection']

 Schema : {'auto_id': False, 'description': 'RAG Schema', 'fields': [{'name': 'chunk_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'rag_text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2048}}, {'name': 'rag_embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1536}}], 'enable_dynamic_field': True}


### 06.02. Preparing data for Knowledge Base

In [3]:
#Load up the PDF document
from langchain.document_loaders import PDFMinerLoader

loader = PDFMinerLoader("Large Language Models.pdf")
pdf_docs = loader.load()


In [4]:
#Split document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter   =   RecursiveCharacterTextSplitter(
    chunk_size=512, # Specify the character chunk size
    chunk_overlap=32, # "Allowed" Overlap across chunks
    length_function=len # Function used to evaluate the chunk size (here in terms of characters)
)

pdf_docs    =   text_splitter.split_documents(pdf_docs)

#Create a list of chunks
rag_text =[]
for i in pdf_docs:
    rag_text.append(i.page_content)
    
print("Total chunks :", len(rag_text))
print("Sample chunk text: ", rag_text[1])

Total chunks : 23
Sample chunk text:  As autoregressive language models, they work by taking an input text and repeatedly predicting the 
next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to 
accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to 
achieve similar results. They are thought to acquire knowledge about syntax, semantics and 
"ontology" inherent in human language corpora, but also inaccuracies and biases present in the


In [5]:
#create embeddings
from langchain.embeddings import OpenAIEmbeddings
import os

openai_api_key=""
os.environ["OPENAI_API_KEY"] = openai_api_key

embeddings_model = OpenAIEmbeddings()

rag_embedding=[embeddings_model.embed_query(i) 
                  for i in rag_text]

#Create chunk IDs 
record_ids=[i for i in range(len(rag_text))]

### 06.03. Populating the Milvus database

In [6]:
insert_data=[record_ids, rag_text, rag_embedding]

i_collection = Collection(collection_name, using=conn_name)

#Insert the records
mr=i_collection.insert(insert_data)
#Flush the inserted records
i_collection.flush()

#Build an index on the embedding field
index_params = {
    "metric_type":"L2",
    "index_type":"IVF_FLAT",
    "params" :{"nlist":1024}
}

i_collection.create_index(
    field_name="rag_embedding",
    index_params=index_params
)

utility.index_building_progress(collection_name, using=conn_name)

{'total_rows': 23, 'indexed_rows': 23, 'pending_index_rows': 0}

### 06.04 Answering questions with RAG

In [8]:
#The retrieval process
search_params = {
    "metric_type": "L2", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 20, "radius":0.5}
}

query = "What is gender bias?"
search_embed=embeddings_model.embed_query(query)
#print(search_embed)

q_collection = Collection(collection_name, using=conn_name)
q_collection.load()

results=q_collection.search(
    data=[search_embed],
    anns_field="rag_embedding",
    param=search_params,
    limit=3, #Get top 3 results only
    expr=None,
    output_fields=["rag_text"],
    consistency_level="Strong"
)

print("Top result :", results[0][0])

Top result : id: 8, distance: 0.20583292841911316, entity: {'rag_text': 'Gender bias refers to the tendency of these models to produce outputs that are unfairly prejudiced \ntowards one gender over another. This bias typically arises from the data on which these models are \ntrained. For example, large language models often assign roles and characteristics based on \ntraditional gender norms; it might associate nurses or secretaries predominantly with women and \nengineers or CEOs with men.'}


In [9]:
#Prepare prompt for LLM

context=[]

#Append all returned chunks
for i in range(len(results[0])):
    context.append(results[0][i].entity.get("rag_text"))

#Create a prompt
prompt= ("Based on only the context provided, answer the query below: "
        + " Context: " + str(context)
        + "\n\n Query: " + query)
        
print(prompt)

Based on only the context provided, answer the query below:  Context: ['Gender bias refers to the tendency of these models to produce outputs that are unfairly prejudiced \ntowards one gender over another. This bias typically arises from the data on which these models are \ntrained. For example, large language models often assign roles and characteristics based on \ntraditional gender norms; it might associate nurses or secretaries predominantly with women and \nengineers or CEOs with men.', 'Language bias refers a type of statistical sampling bias tied to the language of a query that leads to \n"a systematic deviation in sampling information that prevents it from accurately representing the \ntrue coverage of topics and views available in their repository." Luo et al. show that current large \nlanguage models, as they are predominately trained on English-language data, often present the \nAnglo-American views as truth, while systematically downplaying non-English perspectives as', 'Be

In [10]:
#Generate with LLM

from langchain.llms import OpenAI

llm= OpenAI(temperature=0., model="text-davinci-003")

completion=llm(prompt)
print(completion)



Gender bias is the tendency of models to produce outputs that are unfairly prejudiced towards one gender over another. This bias typically arises from the data on which these models are trained, and can lead to outputs that unfairly generalize or caricature groups of people, sometimes in harmful or derogatory ways.
