In [None]:
import boto3, os

In [None]:
pdf_bucket_name_text = 'robb.pdf.bucket'
region = 'us-east-1'

lambda_layer_arn='arn:aws:lambda:us-east-1:638139650817:layer:robb_bedrock_lambda_layer:1'
lambda_execution_role = "arn:aws:iam::638139650817:role/RobbLambdaRole"

Write the map prompt to disk as map_prompt_template.txt

In [None]:
%%writefile map_prompt_template.txt
Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:

Write the summarization prompt to disk as combine_prompt_template.txt

In [None]:
%%writefile combine_prompt_template.txt
combine_prompt = """
Write a concise summary of the following text delimited by triple backquotes that includes the following elements:
* A title that accurately reflects the content of the text.
* An introduction paragraph that provides an overview of the topic.
* Approximately twenty bullet points that list the key points of the text.
* A conclusion paragraph that summarizes the main points of the text.
```{text}```
BULLET POINT SUMMARY:

Write the lambda function to disk PDFProcessLambdaFunction.py

In [None]:
%%writefile PDFProcessLambdaFunction.py

import boto3
import io
import json 
#import uuid
from PyPDF2 import PdfReader
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms.bedrock import Bedrock
#from langchain import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain


modelId = "amazon.titan-tg1-large"
region = 'us-east-1'
chunk_size = 1000
chunk_overlapp = 100
bedrock_max_token = 4096


s3_client = boto3.client('s3')
bedrock_runtime = boto3.client('bedrock-runtime', region_name=region)

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    
    print("The bucket is: " + bucket)
    print("The key is: " + key)
    

    
    if "pdf/"  in key: 
        
        print("This app is working on a  with pdf file.")
        return_status = lambda_pdf_processor(bucket, key)
        

def lambda_pdf_processor( bucket, key ):
    response = s3_client.get_object(Bucket=bucket, Key=key)
    
    try:
        
        pdf_file = s3_client.get_object(Bucket=bucket, Key=key)["Body"].read()
        reader = PdfReader(io.BytesIO(pdf_file))
        
        print("Number of reader pages: "+ str(len(reader.pages)) )
        
        pages = []
        for pp in reader.pages:
            pages.append(pp.extract_text() )
            
        print("Number of PDF pages: " + str(len(pages)) )
        #print("Pages ***************")
        #print(pages[0])
        #print(pages[1])
        #print("*****************")

        text_splitter = RecursiveCharacterTextSplitter(
             separators=["\n\n", "\n"], chunk_size=chunk_size, chunk_overlap=chunk_overlapp)
        docs = text_splitter.create_documents(pages)
         
        print("Number of created docs: " + str(len(docs)) )
       # print("docs ***************")
        #print(docs[0])
        #print(docs[1])
        #print("*****************")
        
        print("Calling lambda summarize")
        lambda_summarize( docs )
        
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
            }

def lambda_summarize( docs : [str]):
    
    llm_model = Bedrock(
        model_id=modelId,
        model_kwargs={
            "maxTokenCount": bedrock_max_token,
            "stopSequences": [],
            "temperature": 0,
            "topP": 1,
        },
        client=bedrock_runtime, 
    )
    #Load prompts
    with open('combine_prompt_template.txt', "r") as file:
        combine_prompt_template= file.read()
        
    print("Loaded the combine_prompt_template.txt.")
        
    combine_prompt_template = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
        
    with open('map_prompt_template.txt', "r") as file:
        map_prompt_template= file.read()
    
    print("Loaded the map_prompt_template.txt.")
        
    map_prompt_template = PromptTemplate(template=map_prompt_template, input_variables=["text"])
    
    summary_chain = load_summarize_chain(llm=llm_model, 
                                         chain_type="map_reduce", 
                                         verbose=True,  
                                         map_prompt=map_prompt_template, 
                                         combine_prompt=combine_prompt_template) 
                                        #, token_max = reduce_chain_max_token )
    print("Calling chain invoke ***************")                                  
    summary_string = summary_chain.invoke(docs)
    #summary_string = "done"
    print("Summary ***********************")
    print( summary_string)
    print("End of Summary ***********************")
    

Zip the Lambda code and prompts,and upload them to AWS Lambda

In [None]:
from helpers.Lambda_Helper import Lambda_Helper

lambda_helper = Lambda_Helper(region, lambda_layer_arn, lambda_execution_role)
# deploy_function
# add_lambda_trigger

pdf_lambda_function_name = "PDFProcessLambdaFunction"


lambda_helper.deploy_function( ["PDFProcessLambdaFunction.py","map_prompt_template.txt", "combine_prompt_template.txt"],function_name=pdf_lambda_function_name)


Configure the S3/Lambda notification for PDF upload

In [None]:
filter_rule_list = [ {'Name': 'suffix','Value': 'pdf'},{'Name': 'prefix','Value': 'pdf/'}]

lambda_helper.add_lambda_trigger(pdf_bucket_name_text, filter_rule_list=filter_rule_list, function_name=pdf_lambda_function_name)

Upload pdf to be summarized

In [None]:
from helpers.S3_Helper import S3_Helper

s3_helper = S3_Helper(region)
# upload_file
# download_object 
# list_objects

s3_helper.upload_file_to_bucket(pdf_bucket_name_text, 'data/letter.pdf', 'pdf/letter.pdf')


Clean up config files

In [None]:
import os
def DeleteLocalFile( file_name:str):
  if os.path.exists(file_name):
    os.remove(file_name)
    
DeleteLocalFile("PDFProcessLambdaFunction.py")
DeleteLocalFile("PDFProcessLambdaFunction.zip")
DeleteLocalFile("combine_prompt_template.txt")
DeleteLocalFile("map_prompt_template.txt")
DeleteLocalFile("results.txt")