In [1]:
from langchain.document_loaders import DirectoryLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
import chromadb
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama

In [2]:
loader = DirectoryLoader('Data', glob="**/*.txt")
docs = loader.load()

In [3]:
embeddings = OllamaEmbeddings(model="llama3.1")

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,)

In [5]:
document = text_splitter.split_documents(docs)

In [6]:
document

[Document(metadata={'source': 'Data\\data1.txt'}, page_content='Step into our Tuscan-inspired sanctuary, where the golden hues of sun-kissed landscapes and the earthy aromas of olive groves and vineyards set the stage for a memorable dining experience. Tuscany,'),
 Document(metadata={'source': 'Data\\data1.txt'}, page_content='Tuscany, renowned for its breathtaking countryside and rich culinary heritage, offers a bounty of flavors and traditions to explore.'),
 Document(metadata={'source': 'Data\\data1.txt'}, page_content='Begin your journey with an antipasto platter adorned with cured meats, aged cheeses, and marinated vegetables, reminiscent of leisurely gatherings under the Tuscan sun. Each bite tells a story of'),
 Document(metadata={'source': 'Data\\data1.txt'}, page_content='tells a story of generations past, of farmers and artisans who have perfected their craft over centuries.'),
 Document(metadata={'source': 'Data\\data1.txt'}, page_content='Indulge in classics like Ribollita,

In [None]:
global_embeddings = [ embeddings.embed_query(txt) for txt in document]

In [9]:
# running chromadb locally
client = chromadb.HttpClient(host= "localhost", port= 8000)

In [None]:
collection = client.create_collection(name="rag_dataset_cosine",metadata={"hnsw:space": "cosine"})

In [10]:
# for already existing vector database
update_db = client.get_collection(name="rag_dataset_cosine")

In [11]:
client.list_collections()

[Collection(id=26d6926f-4f80-496b-9d25-7c89a8267861, name=rag_dataset_cosine),
 Collection(id=b8b75fe8-edd7-4520-913e-fd47db225a94, name=rag_dataset)]

In [None]:
client.get_collection('rag_dataset')

In [None]:
# for existing database
collection = client.get_collection('rag_dataset')

In [None]:
def creating_unique_chunk_ids():
    
    last_index = 0
    last_page_id = None
    for doc in document:
        source = doc.metadata.get('source')
        page = doc.metadata.get('page')
        current_page_id = f"{source}:{page}"
        if current_page_id == last_page_id:
            index += 1

        else:
            index = 0
        
        last_page_id = current_page_id
        chunk_id = f"{source}:{index}"
        doc.metadata["id"] = chunk_id
    return chunk_id

In [None]:
creating_unique_chunk_ids()

In [None]:
new_chunk_id = [chunk.metadata['id'] for chunk in document]

In [22]:
page_content = [page.page_content for page in document]
page_content

['Step into our Tuscan-inspired sanctuary, where the golden hues of sun-kissed landscapes and the earthy aromas of olive groves and vineyards set the stage for a memorable dining experience. Tuscany,',
 'Tuscany, renowned for its breathtaking countryside and rich culinary heritage, offers a bounty of flavors and traditions to explore.',
 'Begin your journey with an antipasto platter adorned with cured meats, aged cheeses, and marinated vegetables, reminiscent of leisurely gatherings under the Tuscan sun. Each bite tells a story of',
 'tells a story of generations past, of farmers and artisans who have perfected their craft over centuries.',
 'Indulge in classics like Ribollita, a hearty vegetable soup enriched with cannellini beans and Tuscan kale. This comforting dish, born out of frugality and resourcefulness, warms both body and soul,',
 'both body and soul, offering a taste of Tuscan tradition with every spoonful.',
 'Alternatively, savor the simplicity of Cacciucco, a rustic seafo

In [None]:
collection.add(documents= page_content, ids= new_chunk_id,embeddings = global_embeddings)

In [None]:
collection.peek()

In [None]:
embedded_querry = embeddings.embed_query("who crowned with plump shrimp and tender calamari")

In [None]:
querry_text = "who crowned with plump shrimp and tender calamari"

In [None]:
res = collection.query(embedded_querry,n_results=5,
            include=['distances','embeddings', 'documents', 'metadatas'])

In [None]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

In [None]:
context_text = "\n\n---\n\n".join([doc for doc in res['documents'][0]])

In [None]:
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=querry_text)

In [None]:
print(prompt)

In [None]:
model = Ollama(model="llama3.1")
response_text = model.invoke(prompt)

In [None]:
response_text

## Building chatbot


In [12]:
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
def chatfunction(text_box,history):
    querry_text = text_box
    embedded_querry = embeddings.embed_query(text_box)
    collection = client.get_collection(name="rag_dataset_cosine")
    res = collection.query(embedded_querry,n_results=5,
            include=['distances','embeddings', 'documents', 'metadatas'])
    PROMPT_TEMPLATE = """
    Answer the question based only on the following context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """
    context_text = "\n\n---\n\n".join([doc for doc in res['documents'][0]])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt_ = prompt_template.format(context=context_text, question=querry_text)
    model = Ollama(model="llama3.1")
    response_text = model.invoke(prompt_)

    return response_text
    
    
    

In [21]:
gr.ChatInterface(fn =chatfunction, textbox= gr.Textbox(placeholder= " enter message here"),
                 chatbot= gr.Chatbot()).launch()

Running on local URL:  http://127.0.0.1:7873

To create a public link, set `share=True` in `launch()`.


