In [None]:
# Install dependencies
!pip install langchain chromadb sentence-transformers
!pip install langchain-community
!pip install PyPDF2

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

# Load a pre-trained embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [11]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from PyPDF2 import PdfReader

# Load documents from files
def load_documents_from_files(file_paths):
    docs = []
    for file_path in file_paths:
        if file_path.endswith(".txt"):
            with open(file_path, "r", encoding="utf-8") as f:
                docs.append(f.read())
        elif file_path.endswith(".pdf"):
            reader = PdfReader(file_path)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
            docs.append(text)
        else:
            print(f"Unsupported file type: {file_path}")
    return docs

# Example file paths (replace with your actual files)
file_paths = ["/content/test.txt", "/content/ASSIGNMENT 02.pdf"]
docs = load_documents_from_files(file_paths)

# Split documents into smaller chunks for better indexing
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.create_documents(docs)

# Set up ChromaDB
vectorstore = Chroma.from_documents(documents=chunks, embedding=embedding_model)


In [None]:
query = "What services does Verizon offer?"

# Perform a similarity search
results = vectorstore.similarity_search(query, k=5)

# Retrieve the context
context = [result.page_content for result in results]
print("Retrieved Context:", context)


In [None]:
!pip install llama-cpp-python


In [None]:
from langchain.llms import LlamaCpp

# Path to your Llama model
llama_model_path = "./llama-7b.ggmlv3.q4_0.bin"

# Load the Llama model
llm = LlamaCpp(model_path=llama_model_path)


In [None]:
from langchain.chains import RetrievalQA

# Create a retriever
retriever = vectorstore.as_retriever()

# Create the RAG pipeline with Llama
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Ask a question
query = "What services does Verizon offer?"
response = qa_chain.run(query)
print("Response:", response)