#### Import Necessary Library

In [1]:
# %% Imports
import os
#from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

import pymongo
import uuid
from datetime import datetime
from dotenv import load_dotenv

#### Data Ingestion- Documentloaders

In [2]:
## Reading a data from mangodb database
load_dotenv()

# MongoDB connection
MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://shubhs016:Customer016@cluster1.bnavxbj.mongodb.net/Customer-db?retryWrites=true&w=majority&appName=Cluster1")
DB_NAME = os.getenv("DB_NAME", "Customer_Support-db")

clients = pymongo.MongoClient(MONGO_URI)

In [3]:
# Lets check the database
db_name = clients.list_database_names()
print(db_name)


['Customer-db', 'Customer_Support-db', 'sample_mflix', 'admin', 'local']


In [4]:
# Lets Check collection in our database @Customer_Support-db

collection = clients['Customer_Support-db']

print(collection.list_collection_names())

['chat_history', 'user_purchase_history', 'Orders', 'kb', 'users', 'user_login_history', 'user_logs_history']


In [5]:
# users
sample_doc = collection["users"].find_one()
print("Sample document:", sample_doc)

Sample document: {'_id': ObjectId('68bf111e871cf5ec0649272e'), 'username': 'alice', 'password': '1234', 'user_id': 'user_123'}


In [13]:
# kb
sample_doc = collection["kb"].find_one()
print("Sample document:", sample_doc)

Sample document: {'_id': ObjectId('68bf111e871cf5ec06492730'), 'question': 'How do I reset my password?', 'answer': "Click on 'Forgot Password' on the login page and follow the steps."}


In [15]:
# kb
sample_doc = collection["kb"].find_one()
print([sample_doc])

[{'_id': ObjectId('68bf111e871cf5ec06492730'), 'question': 'How do I reset my password?', 'answer': "Click on 'Forgot Password' on the login page and follow the steps."}]


In [23]:
# Fetch all documents from the kb collection
docs_cursor = collection["kb"].find({})

# Convert to a list
docs = list(docs_cursor)

for d in docs[:5]:
    print(d)

{'_id': ObjectId('68bf111e871cf5ec06492730'), 'question': 'How do I reset my password?', 'answer': "Click on 'Forgot Password' on the login page and follow the steps."}
{'_id': ObjectId('68bf111e871cf5ec06492731'), 'question': 'What should I do if my laptop doesn’t turn on?', 'answer': 'Please check the power connection and try holding the power button for 10 seconds.'}
{'_id': ObjectId('68bf111e871cf5ec06492732'), 'question': 'My payment failed but money deducted.', 'answer': 'Please provide your transaction ID. We will check and process a refund if applicable.'}


In [24]:
docs

[{'_id': ObjectId('68bf111e871cf5ec06492730'),
  'question': 'How do I reset my password?',
  'answer': "Click on 'Forgot Password' on the login page and follow the steps."},
 {'_id': ObjectId('68bf111e871cf5ec06492731'),
  'question': 'What should I do if my laptop doesn’t turn on?',
  'answer': 'Please check the power connection and try holding the power button for 10 seconds.'},
 {'_id': ObjectId('68bf111e871cf5ec06492732'),
  'question': 'My payment failed but money deducted.',
  'answer': 'Please provide your transaction ID. We will check and process a refund if applicable.'}]

In [25]:
# Convert into text for embeddings
text_data = [f"Q: {d['question']}\nA: {d['answer']}" for d in docs]
text_data

["Q: How do I reset my password?\nA: Click on 'Forgot Password' on the login page and follow the steps.",
 'Q: What should I do if my laptop doesn’t turn on?\nA: Please check the power connection and try holding the power button for 10 seconds.',
 'Q: My payment failed but money deducted.\nA: Please provide your transaction ID. We will check and process a refund if applicable.']

##### Text Splitting from Documents- RecursiveCharacter Text Splitters
final_documents = text_splitter.split_documents(text_data)

##### Text Splitting from textdata- RecursiveCharacter Text Splitters

In [31]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_texts = text_splitter.split_text("\n".join(text_data))
final_texts

["Q: How do I reset my password?\nA: Click on 'Forgot Password' on the login page and follow the steps.\nQ: What should I do if my laptop doesn’t turn on?\nA: Please check the power connection and try holding the power button for 10 seconds.\nQ: My payment failed but money deducted.\nA: Please provide your transaction ID. We will check and process a refund if applicable."]

#### Embedding Techniques

Converting text into vectors

In [43]:
from dotenv import load_dotenv

#load all the environment variables
load_dotenv()

True

In [44]:
#os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY") - thisis used for openai
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [45]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings

  from .autonotebook import tqdm as notebook_tqdm


HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

#### VectorStore

1) Faiss
2) chromaDB
3) AstraDB

Here we are using chromaDB vectorStore

#from langchain_community.vectorstores import FAISS
#from langchain_community.vectorstores import FAISS
#db=FAISS.from_documents(final_documents,embeddings)


#SAVE DB
##db.save_local("faiss_index")

In [47]:
## Vector Embedding And Vector StoreDB
#from langchain_community.vectorstores import Chroma
#db=Chroma.from_documents(final_documents,embeddings)
#from_documents - it is used for document
# from_texts  - it is ussed for text

from langchain_community.vectorstores import FAISS
db=FAISS.from_texts(final_texts,embeddings)
db


<langchain_community.vectorstores.faiss.FAISS at 0x21785a35280>

In [48]:
### Retrieve the results from query vectorstore db
query="Can you integrate with Salesforce?"
retrieved_results=db.similarity_search(query)
print(retrieved_results)

[Document(metadata={}, page_content="Q: How do I reset my password?\nA: Click on 'Forgot Password' on the login page and follow the steps.\nQ: What should I do if my laptop doesn’t turn on?\nA: Please check the power connection and try holding the power button for 10 seconds.\nQ: My payment failed but money deducted.\nA: Please provide your transaction ID. We will check and process a refund if applicable.")]


In [50]:
## similarity Search With Score
query="Can you integrate with Salesforce?"
docs = db.similarity_search_with_score(query)
docs

[(Document(metadata={}, page_content="Q: How do I reset my password?\nA: Click on 'Forgot Password' on the login page and follow the steps.\nQ: What should I do if my laptop doesn’t turn on?\nA: Please check the power connection and try holding the power button for 10 seconds.\nQ: My payment failed but money deducted.\nA: Please provide your transaction ID. We will check and process a refund if applicable."),
  1.8364381)]

In [None]:
### Saving And Loading
## db.save_local("faiss_index")

## Saving to the disk = chrom db
from langchain_community.vectorstores import Chroma
db=Chroma.from_texts(final_texts, embedding=embeddings, persist_directory="./chroma_db")
db

<langchain_community.vectorstores.chroma.Chroma at 0x21788d72120>

In [54]:
# load from disk
#new_db=FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
#docs=new_db.similarity_search(query)


# load from disk
from langchain_chroma import Chroma
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
docs=db2.similarity_search(query)
print(docs[0].page_content)

Q: How do I reset my password?
A: Click on 'Forgot Password' on the login page and follow the steps.
Q: What should I do if my laptop doesn’t turn on?
A: Please check the power connection and try holding the power button for 10 seconds.
Q: My payment failed but money deducted.
A: Please provide your transaction ID. We will check and process a refund if applicable.


In [1]:
import os
import pymongo
from dotenv import load_dotenv

load_dotenv()

# MongoDB connection
MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://shubhs016:Customer016@cluster1.bnavxbj.mongodb.net/Customer-db?retryWrites=true&w=majority&appName=Cluster1")
DB_NAME = os.getenv("DB_NAME", "Customer_Support-db")

clients = pymongo.MongoClient(MONGO_URI)

In [3]:
db_name = clients.list_database_names()
print(db_name)


['Customer-db', 'Customer_Support-db', 'sample_mflix', 'admin', 'local']


In [6]:
db1 = clients['Customer_Support-db']

print(db1.list_collection_names())

['user_purchase_history', 'Orders', 'kb', 'users', 'user_login_history', 'user_logs_history']


In [7]:
collection1 = db1['user_login_history']

for i in collection1.find().limit(5):
    print(i)

{'_id': ObjectId('68bf111e871cf5ec06492733'), 'user_id': 'user_123', 'login_time': '2025-09-01T08:12:00', 'location': 'Mumbai', 'email': 'alice@example.com'}
{'_id': ObjectId('68bf111e871cf5ec06492734'), 'user_id': 'user_456', 'login_time': '2025-09-02T12:45:00', 'location': 'Delhi', 'email': 'bob@example.com'}


In [8]:
import pandas as pd
collection3 = db1['users']

data = list(collection3.find())

df2 = pd.DataFrame(data)
df2

Unnamed: 0,_id,username,password,user_id
0,68bf111e871cf5ec0649272e,alice,1234,user_123
1,68bf111e871cf5ec0649272f,bob,5678,user_456


In [16]:
import pandas as pd
collection1 = db1['user_login_history']

data = list(collection1.find())

df = pd.DataFrame(data)
df.head(8)

Unnamed: 0,_id,user_id,login_time,location,email
0,68bf111e871cf5ec06492733,user_123,2025-09-01T08:12:00,Mumbai,alice@example.com
1,68bf111e871cf5ec06492734,user_456,2025-09-02T12:45:00,Delhi,bob@example.com


In [10]:
# count documents in a collection
collection2 = db1['chat_history']

total_doc = collection2.count_documents({})
print(f"total documents in collection:{total_doc}")

total documents in collection:0


In [11]:
# count all doc in colllectionof db

for i in db1.list_collection_names():
    col1=db1[i]
    print(f"{i}:{col1.count_documents({})} documents")

user_purchase_history:2 documents
Orders:0 documents
kb:3 documents
users:2 documents
user_login_history:2 documents
user_logs_history:2 documents


In [14]:
import pandas as pd
collection1 = db1['user_logs_history']

data = list(collection1.find())

df = pd.DataFrame(data)
df.head(8)

Unnamed: 0,_id,user_id,issue,resolution,created_at
0,68bf111e871cf5ec06492737,user_123,Laptop won’t start,Pending,2025-09-01
1,68bf111e871cf5ec06492738,user_456,Order delayed,Resolved,2025-08-10


In [13]:
import pandas as pd
collection1 = db1['user_purchase_history']

data = list(collection1.find())

df = pd.DataFrame(data)
df.head(8)

Unnamed: 0,_id,user_id,product_name,purchase_date,amount
0,68bf111e871cf5ec06492735,user_123,Lenovo ThinkPad X1,2025-08-12,1200
1,68bf111e871cf5ec06492736,user_456,Wireless Mouse,2025-07-03,25
