In [102]:
from langchain.document_loaders import PyPDFLoader

In [123]:
from langchain.document_loaders import PyPDFLoader

# Load PDF
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("data/Addison_Wesley_The_Object_Orient.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [124]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
splits = text_splitter.split_documents(docs)
len(splits)

565

In [142]:
splits[0].page_content

'The Object-Oriented\nThought Process\nThird Edition'

In [11]:
import sagemaker
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from sagemaker.predictor import retrieve_default
from langchain_community.llms import SagemakerEndpoint
from langchain_community.embeddings import SagemakerEndpointEmbeddings
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
import json
from typing import Dict

class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

endpoint_name = "jumpstart-dft-llama-3-1-8b-instruct-20240725-141454"
sess = sagemaker.session.Session() 

In [10]:
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

lc_embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)
embed_model = LangchainEmbedding(lc_embed_model)

In [12]:
# not used
class NIAIDEmbeddingsContentHandler(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, text_inputs: list[str], model_kwargs: dict) -> bytes:
        input_str = json.dumps(
            {
                "text_inputs": text_inputs,
                **model_kwargs
            }
        )
        return input_str.encode("utf-8")
    def transform_output(self, output: bytes) -> list[list[float]]:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json["embedding"]

In [13]:
from sagemaker.predictor import retrieve_default
endpoint_name = "jumpstart-dft-llama-3-1-8b-instruct-20240820-143856"
predictor = retrieve_default(endpoint_name)

In [15]:
# We don't have deployed embedding model
sagemaker_embeddings = SagemakerEndpointEmbeddings(
    endpoint_name=endpoint_name,
    region_name=sess._region_name,
    model_kwargs={"mode": "embedding"},
    content_handler=NIAIDEmbeddingsContentHandler(),
)

In [129]:
from langchain.vectorstores import Chroma
persist_directory = "./vectordb"
!rm -rf ./vector_db


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [130]:
recreate_db = True
if recreate_db:
    vectordb = Chroma.from_documents(documents=splits,
                    embedding=lc_embed_model,
                    persist_directory=persist_directory)
else:
    vectordb = Chroma(persist_directory=persist_directory, 
                         embedding_function=lc_embed_model)
    
print(vectordb._collection.count())

565


In [144]:
vectordb

<langchain_community.vectorstores.chroma.Chroma at 0x2a0fa54d0>

In [131]:

llm = SagemakerEndpoint(
     endpoint_name=endpoint_name,
     region_name=sess._region_name,
     content_handler=ContentHandler()
 )

def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type = "mmr")
)

In [132]:
query = "What is object"
compressed_docs = compression_retriever.get_relevant_documents(query)
pretty_print_docs(compressed_docs)

Document 1:

- The employee object recognizes the message and returns the requested information.
- Employee object.
- The employee object.
- Employee object.The employee object recognizes the message and returns the requested information.
- Employee
- Employee and payroll class diagrams.
- The employee object. 
- Employee object. 
- Employee object 
- Employee object.
- The employee object. 
- Employee
- Employee object. 
- Employee object 
- Employee object 
- Employee object
- Employee object 
- Employee object
----------------------------------------------------------------------------------------------------
Document 2:

Composition, automobile, object, object composition, interchangeable parts, automobile assembly line, OO software systems, natural, standalone object, multiple computers, video cards, keyboards, drives, tuner, video display, television set, computer, flash drive, hard drive, software systems. 

NO_OUTPUT. 
> Question: What is the relationship between the automobile

In [133]:
from sagemaker.predictor import retrieve_default
endpoint_name = "jumpstart-dft-llama-3-1-8b-instruct-20240725-141454"
predictor = retrieve_default(endpoint_name)


In [116]:
payload = {
    "inputs": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nwhat is the recipe of mayonnaise?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
    "parameters": {
        "max_new_tokens": 512,
        "top_p": 0.9,
        "temperature": 0.6
    }
}
response = predictor.predict(payload)
response

[{'generated_text': "A classic condiment! Mayonnaise is a thick, creamy sauce made from a mixture of oil, egg yolks, vinegar or lemon juice, and seasonings. Here's a simple recipe to make mayonnaise at home:\n\n**Ingredients:**\n\n* 2 large egg yolks\n* 1 tablespoon (15 ml) lemon juice or vinegar (such as white wine vinegar or apple cider vinegar)\n* 1/2 cup (120 ml) neutral-tasting oil, such as canola, grapeseed, or light olive oil\n* Salt (optional)\n\n**Instructions:**\n\n1. **In a medium bowl**, whisk together the egg yolks and lemon juice or vinegar until well combined.\n2. **Slowly add the oil**: While continuously whisking the egg yolk mixture, slowly pour in the oil in a thin, steady stream. Start with a very slow drizzle and gradually increase the flow as the mixture thickens.\n3. **Continue whisking**: Keep whisking until the mixture has doubled in volume and has a thick, creamy consistency. This should take about 5-7 minutes, depending on the speed of your whisk and the temp

In [134]:
from langchain_community.vectorstores import FAISS
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

vectorstore_faiss = FAISS.from_documents(
    docs,
    lc_embed_model,
)


In [135]:
from langchain_core.prompts import PromptTemplate

prompt_template = """[INST]
{query}
[INST]"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["query"]
)
wrapper_store_faiss = VectorStoreIndexWrapper(vectorstore=vectorstore_faiss)


In [136]:
query = "What is polymorphism"
answer = wrapper_store_faiss.query(question=PROMPT.format(query=query), llm=llm)
print(answer)

 Polymorphism is a Greek word that literally means many shapes. It is a key concept in object-oriented programming, which allows objects of different classes to be treated as objects of a common superclass. This is achieved by sending messages to objects, and they respond according to their object's type. In the example provided, a message is sent to a Circle, Rectangle, or Star object, and they respond by drawing themselves. This is possible because each object has its own implementation of the draw() method,


In [137]:
from langchain.chains import RetrievalQA

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectordb.as_retriever()
)

In [138]:
query = "Tell my about object oriented programming"
result = qa({"query": query})
print(result['result'])

 Object-oriented programming (OOP) is a way of thinking and designing software that focuses on creating objects that represent real-world entities or concepts. In OOP, a program is made up of a collection of objects that interact with each other to achieve a specific goal. Each object has its own data (attributes) and behavior (methods), which are encapsulated within the object. This means that the data and code are tightly coupled, and the object can be thought of as a self-contained unit.

Objects


In [139]:
query = 'Why the data and code are tightly coupled?'
result = qa({"query": query})
print(result['result'])

 They are tightly coupled because the code is encapsulated in the object, meaning it is contained within the object, along with the data. This is the fundamental advantage of OO programming. In contrast, in structured programming, the data is often separated from the procedures, and sometimes the data is global. This means that access to data is uncontrolled and unpredictable, and testing and debugging are much more difficult. The code and data are not tightly coupled in structured programming, because the code and data are separate entities


In [140]:
query = 'Is there a way of getting rid of tight coupling'
result = qa({"query": query})
print(result['result'])

 Yes, there are several ways to get rid of tight coupling. One way is to use a contract, or interface, that specifies the methods that must be implemented by any class that implements it. This way, any changes to the interface will only affect the classes that implement it, and not all the classes that use it. Another way is to create objects dynamically, so that you can choose which class to instantiate at runtime, rather than hardcoding it. This way, you can avoid having to create
