In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import JSONLoader , DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.10/u/root-validator-pre-skip

In [2]:
import json
import os

class JSONLoader:
    def __init__(self, file_path):
        self.file_path = file_path

    def load(self):
        with open(self.file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        return data

class DirectoryLoader:
    def __init__(self, directory_path):
        self.directory_path = directory_path

    def load_all(self):
        json_data = {}
        for file_name in os.listdir(self.directory_path):
            if file_name.endswith('.json'):
                loader = JSONLoader(os.path.join(self.directory_path, file_name))
                json_data[file_name] = loader.load()
        return json_data

if __name__ == "__main__":
    # Load a specific JSON file using the full path
    jewel_loader = JSONLoader(r'C:\Ml Project\Smart-Chatbot-LLM-VectorDB\data\jewel_changi_content.json')
    jewel_data = jewel_loader.load()
    print("Loaded jewel_changi_content.json:")
    print(jewel_data)

Loaded jewel_changi_content.json:
['[Advisory] There will be no Light & Music Showcase from 3 to 9 January due to the annual maintenance works.', '[Advisory] The following attractions will be closed from 3 to 9 January 2025:', 'Other attractions continue to remain open and tickets can be purchased here.', 'Attractions', 'Getting to/from Jewel', "Traveller's Information", 'Amenities & Services', 'Changi Lounge', 'Tax Refund at Jewel', 'Itinerary at Jewel', 'Plants at Jewel', 'Jewel Privileges Programme / Tourist Perks', 'Jewel Vouchers', 'Changi Rewards e-Voucher Flexi', 'eCapitaVoucher', 'Jewel Guided Tour', 'Jewel Beleafers Volunteer Programme', 'Shopping & Dining Promotions', 'Canopy Park Promotions', 'Weddings at Jewel', 'Venue Hire', 'Jewel-rassic Quest', 'Walking Net\nWalking Net - Jewel Changi Airport', 'Bouncing Net\nBouncing Net - Jewel Changi Airport', 'Mirror Maze\nMirror Maze - Jewel Changi Airport', 'Hedge Maze\nHedge Maze - Jewel Changi Airport', 'Mastercard® Canopy Bridge

In [3]:
from langchain.docstore.document import Document

# Convert list of strings to a list of Document objects
documents = [Document(page_content=item) for item in jewel_data]

# Verify the results
for doc in documents:
    print(doc.page_content)


[Advisory] There will be no Light & Music Showcase from 3 to 9 January due to the annual maintenance works.
[Advisory] The following attractions will be closed from 3 to 9 January 2025:
Other attractions continue to remain open and tickets can be purchased here.
Attractions
Getting to/from Jewel
Traveller's Information
Amenities & Services
Changi Lounge
Tax Refund at Jewel
Itinerary at Jewel
Plants at Jewel
Jewel Privileges Programme / Tourist Perks
Jewel Vouchers
Changi Rewards e-Voucher Flexi
eCapitaVoucher
Jewel Guided Tour
Jewel Beleafers Volunteer Programme
Shopping & Dining Promotions
Canopy Park Promotions
Weddings at Jewel
Venue Hire
Jewel-rassic Quest
Walking Net
Walking Net - Jewel Changi Airport
Bouncing Net
Bouncing Net - Jewel Changi Airport
Mirror Maze
Mirror Maze - Jewel Changi Airport
Hedge Maze
Hedge Maze - Jewel Changi Airport
Mastercard® Canopy Bridge
Mastercard® Canopy Bridge - Jewel Changi Airport
Discovery Slides
Discovery Slides - Jewel Changi Airport
Changi Expe

In [4]:
# Define text_split function
def text_split(documents):
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
        text_chunks = text_splitter.split_documents(documents)
        return text_chunks

    # Apply RecursiveCharacterTextSplitter
text_chunks = text_split(documents)

In [5]:
# Output results
print(f"Length of chunks: {len(text_chunks)}")
for i, chunk in enumerate(text_chunks):
    print(f"Chunk {i+1}: {chunk.page_content}")

Length of chunks: 75
Chunk 1: [Advisory] There will be no Light & Music Showcase from 3 to 9 January due to the annual maintenance works.
Chunk 2: [Advisory] The following attractions will be closed from 3 to 9 January 2025:
Chunk 3: Other attractions continue to remain open and tickets can be purchased here.
Chunk 4: Attractions
Chunk 5: Getting to/from Jewel
Chunk 6: Traveller's Information
Chunk 7: Amenities & Services
Chunk 8: Changi Lounge
Chunk 9: Tax Refund at Jewel
Chunk 10: Itinerary at Jewel
Chunk 11: Plants at Jewel
Chunk 12: Jewel Privileges Programme / Tourist Perks
Chunk 13: Jewel Vouchers
Chunk 14: Changi Rewards e-Voucher Flexi
Chunk 15: eCapitaVoucher
Chunk 16: Jewel Guided Tour
Chunk 17: Jewel Beleafers Volunteer Programme
Chunk 18: Shopping & Dining Promotions
Chunk 19: Canopy Park Promotions
Chunk 20: Weddings at Jewel
Chunk 21: Venue Hire
Chunk 22: Jewel-rassic Quest
Chunk 23: Walking Net
Walking Net - Jewel Changi Airport
Chunk 24: Bouncing Net
Bouncing Net - Jewe

In [14]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [16]:
embeddings = download_hugging_face_embeddings()

In [17]:
print(embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='sentence-transformers/all-MiniLM-L6-v2' cache_folder=None model_kwargs={} encode_kwargs={}


In [18]:
query_result = embeddings.embed_query("Hello Everyone")
print("Length", len(query_result))

Length 384


In [19]:
query_result

[-0.08075389266014099,
 0.023716680705547333,
 0.06925341486930847,
 0.03541908040642738,
 -0.04857468232512474,
 -0.08920742571353912,
 0.04558877274394035,
 0.0375569723546505,
 -0.02888455055654049,
 0.0018666101386770606,
 0.009728995151817799,
 0.03500308841466904,
 -0.023960722610354424,
 -0.0029713772237300873,
 -0.00012323154078330845,
 0.03204181045293808,
 -0.01193520613014698,
 -0.10722782462835312,
 -0.10799280554056168,
 0.05302891135215759,
 -0.03382653743028641,
 0.05754011869430542,
 -0.026684366166591644,
 0.0054557970724999905,
 -0.05876166746020317,
 0.008353602141141891,
 0.05532684177160263,
 0.020789533853530884,
 -0.02305750362575054,
 -0.06735552847385406,
 -0.026168981567025185,
 0.016779854893684387,
 0.11763430386781693,
 0.018279602751135826,
 0.004240034148097038,
 0.09002988040447235,
 -0.054087020456790924,
 -0.034506767988204956,
 -0.01146402582526207,
 0.020212002098560333,
 -0.013799658045172691,
 -0.014243573881685734,
 -0.049998752772808075,
 -0.0029

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models

# Replace with your Qdrant Cloud API key and endpoint
API_KEY = "QNP0zO-TLdZ3OrM28dYYu1psk5nGYsfQi3EHjraIGy9g-q2oH_jSNA"
ENDPOINT = "https://3607b9cc-3e8d-4a76-ada5-843a14f7b744.europe-west3-0.gcp.cloud.qdrant.io"

# Initialize Qdrant client
client = QdrantClient(
    url=ENDPOINT,
    api_key=API_KEY
)

# Create or reset collection in Qdrant
collection_name = "my_collection"
vector_size = 384  # Adjust this based on your embedding model's output size

# Define vectors configuration
vectors_config = models.VectorParams(
    size=vector_size,         # Dimension of the vectors
    distance=models.Distance.COSINE  # Similarity metric
)

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=vectors_config
)

print(f"Collection '{collection_name}' created successfully.")


  client.recreate_collection(


Collection 'my_collection' created successfully.


In [21]:
import uuid

# Generate embeddings for each chunk of text
for i, document in enumerate(text_chunks):
    text = document.page_content  # Extract the text from the Document object
    vector = embeddings.embed_query(text)  # Generate embedding for each text
    
    # Generate a unique UUID for each point
    point_id = str(uuid.uuid4())

    client.upsert(
        collection_name=collection_name,
        points=[
            {
                'id': point_id,
                'vector': vector,
                'payload': {'text': text}
            }
        ]
    )

print(f"Embeddings for {len(text_chunks)} text chunks stored in Qdrant successfully.")


Embeddings for 75 text chunks stored in Qdrant successfully.


In [22]:
# Assuming embeddings and client are already initialized and collection exists

query = "What are the name of Changi Airport Lounge"
query_vector = embeddings.embed_query(query)  # Generate embedding for the query

# Perform similarity search in Qdrant
docs = client.search(
    collection_name=collection_name,
    query_vector=query_vector,
    limit=3
)

# Print the results
for result in docs:
    print(result.payload["text"])  # Assuming the text is in the payload

Changi Lounge
Canopy Park
Canopy Park - Jewel Changi Airport
Hedge Maze
Hedge Maze - Jewel Changi Airport


In [23]:
prompt_template="""

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [24]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [43]:
llm = CTransformers(model="C:/Ml Project/Smart-Chatbot-LLM-VectorDB/model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                   model_type="llama",
                   config={'max_new_tokens': 512, 'temperature': 0.8})


In [5]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from langchain.llms import CTransformers

# Initialize Qdrant client
client = QdrantClient(url="https://3607b9cc-3e8d-4a76-ada5-843a14f7b744.europe-west3-0.gcp.cloud.qdrant.io")

# Set the collection name
collection_name = "my_collection"

# Create Qdrant retriever
retriever = Qdrant(client=client, collection_name=collection_name, embedding=embeddings)


# Use RetrievalQA with the retriever
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Use "stuff" for combining documents
    retriever=retriever.as_retriever(),  # Convert retriever to the format expected by RetrievalQA
    return_source_documents=True
)

# Query and get the result
query = "What are the names of Changi Airport Lounges?"
result = qa.run(query)

# Print result
print(result)


PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.10/u/root-validator-pre-skip

In [None]:
from langchain_qdrant import RetrievalMode
from qdrant_client import QdrantClient
from transformers import CTransformers

# Initialize Qdrant client and retriever
client = QdrantClient(url="https://3607b9cc-3e8d-4a76-ada5-843a14f7b744.europe-west3-0.gcp.cloud.qdrant.io")
retriever = RetrievalMode(client=client, collection_name="my_collection", embedding="embeddings")

# Initialize your LLM
llm = CTransformers(model="C:/Ml Project/Smart-Chatbot-LLM-VectorDB/model/llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama")

# Simple Query and Retrieval
query = "What are the names of Changi Airport Lounges?"
results = retriever.similer_search(query, k=2)  # Get top 2 relevant results

# Generate response from LLM using retrieved results
response = llm(results)

# Output the result
print(response)




ImportError: cannot import name 'CTransformers' from 'transformers' (c:\Users\newth\miniconda3\envs\chatbot\lib\site-packages\transformers\__init__.py)

In [None]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient

# Assuming client, collection_name, and embeddings are initialized correctly

# Initialize Qdrant client (ensure it points to your Qdrant server)
client = QdrantClient(url="https://3607b9cc-3e8d-4a76-ada5-843a14f7b744.europe-west3-0.gcp.cloud.qdrant.io") 

# Initialize the retriever with Qdrant
retriever = Qdrant(client=client, collection_name=collection_name, embedding=embeddings)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,  # Pre-initialized `llm` here
    chain_type="stuff",  # Choose chain type
    retriever=retriever,  # Use `retriever` directly
    return_source_documents=True
)

# Query
query = "What are the name of Changi Airport Lounge?"
result = qa.run(query)

# Print result
print(result)


TypeError: __init__() got an unexpected keyword argument 'embedding'

In [47]:
from langchain.chains import RetrievalQA
from langchain_qdrant import RetrievalMode
# Assuming `llm` and `client` are already initialized

# The retriever setup for Qdrant (with corrected retrieve method)
retriever = client.RetrievalMode(search_kwargs={'k': 2})

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,  # Pre-initialized `llm` here
    chain_type="stuff",  # Choose chain type
    retriever=retriever,  # Use `retriever` directly without needing any extra method
    return_source_documents=True
)

# Query
query = "What are the name of Changi Airport Lounge?"
result = qa.run(query)

# Print result
print(result)



AttributeError: 'QdrantClient' object has no attribute 'RetrievalMode'

In [None]:
from langchain.chains import RetrievalQA

# Assuming `llm` and `client` (QdrantClient) are already initialized

# Convert Qdrant client to retriever, fetching top 2 documents
retriever = client.as_retriever(search_kwargs={'k': 2})

# Optionally, define chain type kwargs if needed
chain_type_kwargs = {}

# Initialize the RetrievalQA chain with the Qdrant retriever
qa = RetrievalQA.from_chain_type(
    llm=llm,  # Pre-initialized language model
    chain_type="stuff",  # You can use "stuff", "map_reduce", etc.
    retriever=retriever,  # Use the retriever set up from Qdrant client
    return_source_documents=True,  # This will return the source documents as well
    chain_type_kwargs=chain_type_kwargs  # Any additional kwargs
)

# Query for information
query = "What is the Changi Lounge?"

# Run the query
result = qa.run(query)

# Print the result, including the answer and source documents
print(result)

In [88]:
from langchain.chains import RetrievalQA

# Assuming `llm` and `client` are already initialized

# The retriever setup for Qdrant
retriever = client.as_retriever(search_kwargs={'k': 2})

# Define chain type kwargs if needed, otherwise remove or adjust accordingly
chain_type_kwargs = {}  # If you have any specific kwargs for the chain, set them here

# Initialize RetrievalQA with Qdrant retriever
qa = RetrievalQA.from_chain_type(
    llm=llm,  # Use the pre-initialized `llm` here
    chain_type="stuff",  # Can be "stuff", "map_reduce", or other supported types
    retriever=retriever,
    return_source_documents=True,  # This will return the source documents in the response
    chain_type_kwargs=chain_type_kwargs  # Additional arguments for the chain type
)


AttributeError: 'QdrantClient' object has no attribute 'as_retriever'

In [24]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Response :  Acne is a common skin disease characterized by pimples on the face, chest, and back that occur when the pores of the skin become clogged with oil, dead skin cells, and bacteria.


KeyboardInterrupt: 