# PDF Summary

In [1]:
#!pip install --upgrade sagemaker --quiet
!pip install langchain==0.0.148 --quiet
#!pip install faiss-cpu --quiet

In [2]:
import time
import sagemaker, boto3, json
from sagemaker.session import Session
from sagemaker.model import Model
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from typing import Any, Dict, List, Optional
from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.llms.sagemaker_endpoint import ContentHandlerBase

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()
model_version = "*"

In [3]:
import boto3, json
from sagemaker.session import Session

In [4]:
sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name

In [5]:
def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type="application/json"):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json
    )
    return response


def parse_response_model_flan_t5(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    generated_text = model_predictions["generated_texts"]
    return generated_text


def parse_response_multiple_texts_bloomz(query_response):
    generated_text = []
    model_predictions = json.loads(query_response["Body"].read())
    for x in model_predictions[0]:
        generated_text.append(x["generated_text"])
    return generated_text

In [6]:
endpoint_name = 'jumpstart-dft-hf-llm-falcon-7b-instruct-bf16'

In [7]:
from langchain import SagemakerEndpoint

In [8]:
from langchain.llms.sagemaker_endpoint import LLMContentHandler

In [9]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({"inputs": prompt, **model_kwargs})
        return input_str.encode('utf-8')
      
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

content_handler = ContentHandler()

In [10]:
parameters = {
    "max_length": 200,
    "num_return_sequences": 1,
    "top_k": 250,
    "top_p": 0.95,
    "do_sample": False,
    "temperature": 1,
}

In [11]:
llm = SagemakerEndpoint(
    endpoint_name = endpoint_name, 
    region_name = aws_region, 
    model_kwargs = parameters,
    content_handler = content_handler
)

## PDF Summary

In [12]:
!pip install PyPDF2 --quiet

In [13]:
import PyPDF2
from io import BytesIO

In [14]:
sess = sagemaker.Session()
s3_bucket = sess.default_bucket()
s3_prefix = 'docs'

In [23]:
s3_file_name = 'sample-blog.pdf'
#s3_file_name = '2016-3series.pdf'
#s3_file_name = 'gen-ai-aws.pdf'

In [24]:
s3r = boto3.resource("s3")
doc = s3r.Object(s3_bucket, s3_prefix+'/'+s3_file_name)
#doc = s3r.Object(, '/docs/sample.pdf')
        
contents = doc.get()['Body'].read()
reader = PyPDF2.PdfReader(BytesIO(contents))
        
raw_text = []
for page in reader.pages:
    raw_text.append(page.extract_text())
contents = '\n'.join(raw_text)  

In [25]:
new_contents = str(contents[:8000]).replace("\n"," ") 
print('new_contents: ', new_contents)

new_contents:  Exam ple blog entry   Moving Day and settling in   Due to wanting to live closer to the city centre, I moved from my  house in second year to a flat in the city centre for third  year.  Mov ing back to Liverpool was great;  I felt like I was coming  home,  much to my parents ’ displeasure!    For the first week back , I worked a few days for the university in  my job as an A mbassador, showing potential new students  round while becoming reacqua inted with the campus myself.   When lectures restarted , it seemed like summer had  disappeared in a matter of minutes!   However , I was eager to get learning again and looked forward  to seminars and lectures on the books and topics I had been researching over the su mmer.   Students from older years had warned me about third year being pretty scary, so I had prepared well  and really enjoyed the first lectures from my new modules. Reconnecting with my societies was  doubly fun, being the President of Combined Honours for the 

In [26]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter()
texts = text_splitter.split_text(new_contents) 
print(texts)

['Exam ple blog entry   Moving Day and settling in   Due to wanting to live closer to the city centre, I moved from my  house in second year to a flat in the city centre for third  year.  Mov ing back to Liverpool was great;  I felt like I was coming  home,  much to my parents ’ displeasure!    For the first week back , I worked a few days for the university in  my job as an A mbassador, showing potential new students  round while becoming reacqua inted with the campus myself.   When lectures restarted , it seemed like summer had  disappeared in a matter of minutes!   However , I was eager to get learning again and looked forward  to seminars and lectures on the books and topics I had been researching over the su mmer.   Students from older years had warned me about third year being pretty scary, so I had prepared well  and really enjoyed the first lectures from my new modules. Reconnecting with my societies was  doubly fun, being the President of Combined Honours for the year meant lo

In [27]:
from langchain.docstore.document import Document
docs = [Document(page_content=t) for t in texts[:10]]
print(docs)

[Document(page_content='Exam ple blog entry   Moving Day and settling in   Due to wanting to live closer to the city centre, I moved from my  house in second year to a flat in the city centre for third  year.  Mov ing back to Liverpool was great;  I felt like I was coming  home,  much to my parents ’ displeasure!    For the first week back , I worked a few days for the university in  my job as an A mbassador, showing potential new students  round while becoming reacqua inted with the campus myself.   When lectures restarted , it seemed like summer had  disappeared in a matter of minutes!   However , I was eager to get learning again and looked forward  to seminars and lectures on the books and topics I had been researching over the su mmer.   Students from older years had warned me about third year being pretty scary, so I had prepared well  and really enjoyed the first lectures from my new modules. Reconnecting with my societies was  doubly fun, being the President of Combined Honours

In [28]:
from langchain.chains.summarize import load_summarize_chain
chain = load_summarize_chain(llm, chain_type="stuff")
chain.run(docs)

' The writer moved from their parental home to a flat in the city centre for their third year of university'

In [37]:
from langchain.prompts import PromptTemplate

prompt_template = """Write a concise summary of the following:


{text}


CONCISE SUMMARY """


In [38]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

': The author moved from their parental home to a flat in the city centre for their third year,'