Simple Pipeline that
- read all the .txt from a folder
- chunk data
- apply embedding
- save in Pgvector and qdrant preprod
- retrieve data from pgvector
- query a question using mistral 


In [1]:
import requests
import json
import os
from langchain.vectorstores.pgvector import DistanceStrategy
from langchain_postgres import PGVector
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Qdrant
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector
from dotenv import load_dotenv


In [2]:
load_dotenv()
local_path=os.getenv("LOCAL_PATH")
collection_name=os.getenv('COLLECTION_NAME')
embedding_model=os.getenv('EMBEDDING_MODEL_NAME')
chunk_size=int(os.getenv('CHUNK_SIZE'))
chunk_overlap=int(os.getenv('CHUNK_OVERLAP'))
pgddisconnection=os.getenv('PGDDISCONNECTION')
qdrant_url = os.getenv("QDRANT_URL", "")
qdrant_api_key = os.getenv("QDRANT_API_KEY", "")



In [3]:
question = "What is Retrieval-Augmented Generation (RAG), and why is it useful?"

In [4]:
## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embeddings = HuggingFaceEmbeddings(
            model_name=embedding_model,
            model_kwargs = {'device': 'cpu'})


  from tqdm.autonotebook import tqdm, trange


In [5]:
print(local_path)

./data/mcl_test


In [6]:
loader = DirectoryLoader(f'{local_path}', glob="./*.txt")

documents = loader.load()

In [7]:
text_splitter = RecursiveCharacterTextSplitter(separators=['##'], chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts = text_splitter.split_documents(documents)

In [8]:
documents[0]

Document(metadata={'source': 'data/mcl_test/mcl_test.txt'}, page_content='# Building a Retrieval-Augmented Generation (RAG) System with LangChain\n\n## Introduction to RAG\n\nRetrieval-Augmented Generation (RAG) is a powerful method that combines retrieval of information with generative models. This approach is particularly effective in scenarios where the available data is too vast to be memorized by the model. Instead of relying solely on the model\'s pre-trained knowledge, RAG leverages external data sources to provide more accurate and contextually relevant responses.\n\nLangChain is an excellent framework for building RAG systems. It provides tools to integrate various language models with external data sources, enabling the creation of dynamic and responsive applications.\n\n## Step-by-Step Guide to Building a RAG System with LangChain\n\n### 1. Setting Up LangChain\n\nTo begin, you\'ll need to install LangChain and its dependencies. This can be done via pip:\n\n```bash\n\npip in

In [9]:
len(documents)

1

In [10]:
texts[0:3]

[Document(metadata={'source': 'data/mcl_test/mcl_test.txt'}, page_content="# Building a Retrieval-Augmented Generation (RAG) System with LangChain\n\n## Introduction to RAG\n\nRetrieval-Augmented Generation (RAG) is a powerful method that combines retrieval of information with generative models. This approach is particularly effective in scenarios where the available data is too vast to be memorized by the model. Instead of relying solely on the model's pre-trained knowledge, RAG leverages external data sources to provide more accurate and contextually relevant responses.\n\nLangChain is an excellent framework for building RAG systems. It provides tools to integrate various language models with external data sources, enabling the creation of dynamic and responsive applications.\n\n## Step-by-Step Guide to Building a RAG System with LangChain"),
 Document(metadata={'source': 'data/mcl_test/mcl_test.txt'}, page_content="### 1. Setting Up LangChain\n\nTo begin, you'll need to install Lang

In [11]:
db = PGVector.from_documents(
    documents= texts,
    embedding = embeddings,
    collection_name= collection_name,
    distance_strategy = DistanceStrategy.COSINE,
    pre_delete_collection = True,
    connection=pgddisconnection)

In [12]:

Qdrant.from_documents(
    texts,
    embeddings,
    url=qdrant_url,
    api_key=qdrant_api_key,
    port=None,
    collection_name=collection_name,
    force_recreate=True
)

<langchain_community.vectorstores.qdrant.Qdrant at 0x7f6d0cc1ec90>

In [None]:
vector_store = PGVector(
        connection=pgddisconnection, 
        collection_name=collection_name, 
        embeddings=embeddings,
    )


In [None]:
test = vector_store.similarity_search(question, k=2)
test

In [None]:
# Query for which we want to find semantically similar documents


#Fetch the k=2 most similar documents
docs =  db.similarity_search(question, k=1)

In [None]:
docs

In [None]:
context = '\n'.join([x.page_content for x in docs])


prompt = f"""[INST]You are a helpful chatbot that can answer questions based on the provided context. 
You need not make use of the entire context provided to you.
Try to interpret the question. If it is a general question asking for definitions, you can rephrase the content without changing the meaning of it.
If the asked question demands steps or process or procedure, do not change the content and stick to the original form as possible. Also if context has Red Hat specific knowledge add that in answer.
Also provide the source from which you took the answer under source: tag

Context: {context} [\INST]
Question: {question}"""

In [None]:
url = 'https://ddis-mistral-7b.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/chat/completions'
headers = {
    'accept': 'application/json',
    'Content-Type': 'application/json',
}
data = {
    "messages": [
        {
            "role": "user",
            "content": prompt
        }
    ],
    "model": "mistral-7b",
    "stream": False
}

try:
    response = requests.post(url, headers=headers, data=json.dumps(data), verify=False, timeout=30)
    response.raise_for_status()  # Check for HTTP errors

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

In [None]:
print(response.json()['choices'][0]['message']['content']) 

In [None]:
# Intializing llm variables
# openai_api_key = constants.OPEN_AI_KEY
# openai_api_base = "{llm_url}/v1".format(llm_url=constants.LLM_URL)
# Fetch model information
try:
    response = requests.get("https://ddis-mistral-7b.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/models")
    response.raise_for_status()
    model = response.json()["data"][0]["id"]
    print(f"Model ID: {model}")
except requests.RequestException as e:
    print(f"Failed to fetch model information: {e}")
    raise
model

In [None]:
from openai import OpenAI


client = OpenAI(
    api_key="EMPTY",
    base_url="https://ddis-mistral-7b.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1",
)

stream = client.chat.completions.create(
    model=model,
    messages=[{'role': 'user', 'content': prompt}],
    stream=True,
    user='user_identifier',
)

response = ""
for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")
        response += chunk.choices[0].delta.content
response
