# Pre-requisite  

In [None]:
!pip install --upgrade -q boto3 langchain sagemaker-studio-image-build aws-sam-cli tqdm

# Workshop variables

In [31]:
name="gen-ai-workshop"
jumpstart_model = "huggingface-llm-falcon-7b-instruct-bf16"
endpoint_name=f"{name}-endpoint"

In [None]:
import boto3


aws_region = boto3.Session().region_name
aws_account_id = boto3.client('sts').get_caller_identity().get('Account')

# Launch Kendra Index

Kendra will be the RAG endpoint, that will store our documents, for RAG prompt engineering.

We will first create a role for Kendra

In [None]:
import boto3
import json

iam_client = boto3.client('iam')

kendra_trust_policy = {
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Principal": {
            "Service": "kendra.amazonaws.com"
        },
        "Action": "sts:AssumeRole"
        }]
    }

policy_cloudwatch_arn = "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"

try:
    response =iam_client.get_role(RoleName=f"kendra-{name}-role")
    print(f"kendra-{name}-role Role already exists")
except:
    response = iam_client.create_role(
        RoleName=f"kendra-{name}-role",
        AssumeRolePolicyDocument=json.dumps(kendra_trust_policy),
    )
    print(f"Created the role kendra-{name}-role")
    
kendra_role_arn = response["Role"]["Arn"]

try:
    response = iam_client.attach_role_policy(
        RoleName=f"kendra-{name}-role",
        PolicyArn=policy_cloudwatch_arn
    )
except:
    print(f"Policy already attached to role kendra-{name}-role")
 


In [None]:
kendra_client = boto3.client("kendra")

def list_kendra_indices(kendra_client):
    kendra_indices = kendra_client.list_indices()
    return kendra_indices["IndexConfigurationSummaryItems"]


def find_index_id_according_to_tag(kendra_client, indices, aws_region, aws_account_id):
    for k_index in indices:
        describe_index_tags_response = kendra_client.list_tags_for_resource(
            ResourceARN=f"arn:aws:kendra:{aws_region}:{aws_account_id}:index/{k_index['Id']}"
        )
        for tag in describe_index_tags_response["Tags"]:
            if tag["Key"] == "workshop" and tag["Value"] == "gen-ai":
                return k_index['Id']
    return None


def create_kendra_index(kendra_client):
    kendra_index_response = kendra_client.create_index(
        Name=f"{name}-index",
        Edition="DEVELOPER_EDITION",
        RoleArn=kendra_role_arn,
        Tags=[{
            'Key': 'workshop',
            'Value': 'gen-ai'
            }]
    )
    kendra_index_id = kendra_index_response["Id"]
    return kendra_index_id


# Creating Kendra index
kendra_indices = list_kendra_indices(kendra_client)
if len(kendra_indices) > 0:
    kendra_index_id = find_index_id_according_to_tag(kendra_client, kendra_indices, aws_region, aws_account_id)
    if kendra_index_id is None:
        # you have indices but not tagged with workshop=gen-ai
        kendra_index_id = create_kendra_index(kendra_client)
else:
    create_kendra_index(kendra_client)


print(kendra_index_id)


## LLM Endpoint provisioning

Now we will deploy LLM Model Falcon 7B instruct using SageMaker sdk `JumpstartModel` class, that will do all the heavy lifting configuring the endpoint in Amazon SageMaker.

In [None]:
from sagemaker import get_execution_role


try:
    sm_execution_role = get_execution_role()
except:
    # To work locally use explicit role
    sm_execution_role = "arn:aws:iam::910416587115:role/SageMaker-Role-Full"

print(sm_execution_role)

In [34]:
from sagemaker.jumpstart.model import JumpStartModel


jumpstart_model_id = "huggingface-textgeneration-falcon-7b-instruct-bf16"
sagemaker_endpoint_name = f"{name}-falcon-7b-instruct"


try:
    model = JumpStartModel(model_id=jumpstart_model_id, model_version="1.0.0", role=sm_execution_role)
    model.deploy(endpoint_name=f"{sagemaker_endpoint_name}", wait=False)
except Exception as e:
    print(e)
    print(f"""\nPlease make sure that you dont have in your account an endpoint or endpoint configuration with name {sagemaker_endpoint_name}\n
          Endpoint configuration: Check at https://{aws_region}.console.aws.amazon.com/sagemaker/home?region={aws_region}#/endpointConfig
          Endpoint: Check at https://{aws_region}.console.aws.amazon.com/sagemaker/home?region={aws_region}#/endpoints/ 
          
          If the endpoint is already running, you may continue the workshop and use it.
          """)
    
    

Couldn't call 'get_role' to get Role ARN from role name omer to get Role path.


An error occurred (ValidationException) when calling the CreateEndpointConfig operation: Cannot create already existing endpoint configuration "arn:aws:sagemaker:eu-west-1:910416587115:endpoint-config/gen-ai-workshop-falcon-7b-instruct".

Please make sure that you dont have in your account an endpoint or endpoint configuration with name gen-ai-workshop-falcon-7b-instruct

          Endpoint configuration: Check at https://eu-west-1.console.aws.amazon.com/sagemaker/home?region=eu-west-1#/endpointConfig
          Endpoint: Check at https://eu-west-1.console.aws.amazon.com/sagemaker/home?region=eu-west-1#/endpoints/ 
          
          If the endpoint is already running, you may continue the workshop and use it.
          


# Build the backend lambda, and API Gateway

By now, we have launch Amazon Kendra, and Falcon LLM, using Amazon SageMaker endpoint.

Now we will build the Backend lambda, using [AWS Serverless Application Model](https://aws.amazon.com/serverless/sam/) (SAM), an open-source framework for building serverless applications.

The lambda code [rag_app](/lab4/rag_app/) contains couple of environment variables that help us control the lambda behavior.


## Build the container image for frontend chatbot application

While using Sagemaker studio, using `sagemaker-studio-image-build` we can trigger a docker build leveraging [AWS CodeBuild](https://aws.amazon.com/codebuild/)

We will start by adding appropriate roles to SageMaker execution role, to allow triggering the build job.

In [None]:
import boto3

sm_execution_role_name = sm_execution_role.split("/")[-1]

with open("codebuild-policy.json") as f:
    code_build_policy_document = f.read()

iam_client = boto3.client("iam")

try:
    policy_response = iam_client.create_policy(
        PolicyName=f"codebuild-policy-sm-docker-build",
        PolicyDocument=code_build_policy_document
    )
    code_build_policy_arn = policy_response['Policy']['Arn']
except:
    print("Policy exists")
    code_build_policy_arn = f"arn:aws:iam::{aws_account_id}:policy/codebuild-policy-sm-docker-build"
    

attach_response = iam_client.attach_role_policy(
    RoleName=sm_execution_role_name,
    PolicyArn=code_build_policy_arn
)

codebuild_trust_policy = {
     "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "sagemaker.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        },
        {
            "Effect": "Allow",
            "Principal": {
                "Service": [
                    "codebuild.amazonaws.com" 
                ]
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

response = iam_client.update_assume_role_policy(
    RoleName=sm_execution_role_name,
    PolicyDocument=json.dumps(codebuild_trust_policy)
)

Now that we have configured SageMaker execution role, we can trigger a build job to build the frontend chatbot application that was built using [`streamlit`](https://streamlit.io/).

In [None]:
!cd fe && sm-docker build --role $sm_execution_role_name --repository gen-ai-streamlit-fe:latest .

Now that we have updated the template file lets verify that all the components are ready, and deploy the stack

In [None]:
import boto3
from time import sleep


def is_kendra_active(kendra_index_id):
    kendra_client = boto3.client('kendra')
    response = kendra_client.describe_index(
        Id=kendra_index_id
    )
    return response['Status']

def is_sagemaker_jumpstart_active(sagemaker_endpoint_name):
    sagemaker_client = boto3.client('sagemaker')
    response = sagemaker_client.describe_endpoint(
        EndpointName=sagemaker_endpoint_name
    )
    return response['EndpointStatus']


In [None]:
import sys

print(f"Checking if Kendra index id {kendra_index_id} is active")
while True:
    try:
        kendra_status = is_kendra_active(kendra_index_id)
        if kendra_status == "ACTIVE":
            print(f"Kendra index is {kendra_status}")
            break
        else:
            sys.stdout.write(".")
            sleep(5)
    except Exception as e:
            print(e)
            print(f"Please check if you have an index in Kendra https://{aws_region}.console.aws.amazon.com/kendra/home?region={aws_region}#indexes\n")
            break
        

print(f"Checking if Kendra index id {sagemaker_endpoint_name} is in service")
while True:
    try:
        sagemaker_endpoint_status = is_sagemaker_jumpstart_active(sagemaker_endpoint_name)
        if sagemaker_endpoint_status == "InService":
            print(f"SageMaker endpoint is {sagemaker_endpoint_status}")
            break
        else:
            sys.stdout.write(".")
            sleep(5)
    except Exception as e:
            print(e)
            print(f"Please check if you have a Sagemaker endpoint in SageMaker https://{aws_region}.console.aws.amazon.com/sagemaker/home?region={aws_region}#/endpoints\n")
            break


Now we will update the [template](/lab4/template.yml), and deploy the stack using sam-cli.

Let's add to SageMaker execution role permissions to deploy the application with CloudFormation

In [None]:
attach_response = iam_client.attach_role_policy(
    RoleName=sm_execution_role_name,
    PolicyArn="arn:aws:iam::aws:policy/AWSCloudFormationFullAccess"
)

In [None]:
with open("template.yml", 'r') as f:
    template = f.read()
    
update_template = template.replace("***KENDRA_INDEX_ID***", f"{kendra_index_id}")

with open("template.yml", 'w') as f:
    f.write(update_template)

In [None]:
!sam build

In [None]:
!sam deploy --stack-name gen-ai-immersion-day-stack --resolve-s3 --capabilities CAPABILITY_IAM

# Chatbot app

Now that we launched our stack with the following components:

- Frontend: streamlit application running on ECS Fargate
- Kendra Index - currently empty
- Falcon Endpoint hosted in SageMaker using Jumpstart
- DynamoDB table that is acting as chat memory per user ID, and host the entire conversation
- Backend: RAG lambda with API Gateway, this lambda does the following
  - retrieve the POST request from the chatbot app, abd by using `LangChain` 
  - get the conversion history per user ID
  - retrieve from Kendra documents that answers the user prompt
  - create the prompt using a prompt template
  - POST the prompt to the LLM model and respond back to the chatbot app with the generated text

Let's get the chatbot app from the CloudFormation stack and start chat with the chatbot

In [None]:
!aws cloudformation describe-stacks --stack-name gen-ai-immersion-day-stack --query "Stacks[0].Outputs[1].OutputValue"

Let's see how the LLM responds to what is `Amazon EC2 Nitro Enclave?` without RAG

![no-rag](../img/nitro_enclave_no_rag.jpg)

We can see that the LLM has no notion on what is Nitro Enclave

### Adding to Kendra documents for prompt engineering

Now that we have a the chatbot application works, let's add to Kendra documents that will enrich the prompt with more context, and help the LLM model to provide better results.

Kendra provides multiple ways indexing data into it, we will use 2.
- S3 - uploading documents into S3 and index them from their
- Web crawling

We will use S3 data connector source

In [None]:
# specifying bucket name for model artifact storage
prefix = 'kendra-storage'
kendra_bucket_name = f'{prefix}-{aws_account_id}-{aws_region}'

s3_client = boto3.client('s3', region_name=aws_region)
try:
    if aws_region == "us-east-1":
        s3_client.create_bucket(Bucket=kendra_bucket_name)
    else:
        s3_client.create_bucket(Bucket=kendra_bucket_name,
                                CreateBucketConfiguration={'LocationConstraint': aws_region})
    print(f"Bucket {kendra_bucket_name} created")
except Exception as e:
    print(f"Bucket {kendra_bucket_name} already exists")
    

Now let's upload files into the bucket

In [None]:
import os
import requests
from tqdm import tqdm
from io import BytesIO


# List of URLs to download PDFs from
pdf_urls = [
    "https://patentimages.storage.googleapis.com/bb/0f/5a/6ef847538a6ab5/US10606565.pdf",
    "https://patentimages.storage.googleapis.com/f7/50/e4/81af7ddcbb2773/US9183397.pdf",
    "https://docs.aws.amazon.com/pdfs/enclaves/latest/user/enclaves-user.pdf",
    "https://docs.aws.amazon.com/pdfs/ec2-instance-connect/latest/APIReference/ec2-instance-connect-api.pdf",
]
print(kendra_bucket_name)
# Download PDFs from the URLs and upload them to the S3 bucket
for url in tqdm(pdf_urls):
    response = requests.get(url, stream=True)
    filename = os.path.basename(url)
    print(f"Working on {filename}")
    fileobj = BytesIO()
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024
    progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
    for data in response.iter_content(block_size):
        progress_bar.update(len(data))
        fileobj.write(data)
    progress_bar.close()
    fileobj.seek(0)
    s3_client.upload_fileobj(fileobj, kendra_bucket_name, filename)

Verify that the files were uploaded

In [None]:
!aws s3 ls s3://{kendra_bucket_name}/

Let's add to Kendra role permissions to access the S3 objects for indexing

In [None]:
kendra_policy = {
            "Version": "2012-10-17",
            "Statement": [
                {
                    "Action": [
                        "s3:GetObject",
                        "s3:ListBucket"
                        ],
                    "Resource": [
                        f"arn:aws:s3:::{kendra_bucket_name}",
                        f"arn:aws:s3:::{kendra_bucket_name}/*"
                        ],
                    "Effect": "Allow"
                    },
                {
                    "Effect": "Allow",
                    "Action": [
                         "kendra:BatchPutDocument",
                         "kendra:BatchDeleteDocument",
                         "kendra:BatchPutDocument"
                        ],
                    "Resource": f"arn:aws:kendra:{aws_region}:{aws_account_id}:index/{kendra_index_id}"
                    }
                ]
            }

try:
    kendra_policy_response = iam_client.create_policy(
        PolicyName="kendra-s3-access",
        PolicyDocument=json.dumps(kendra_policy)
        )
    kendra_policy_arn = kendra_policy_response['Policy']['Arn']
except:
    print("Policy exists")
    kendra_policy_arn = f"arn:aws:iam::{aws_account_id}:policy/kendra-s3-access"
    

attach_response = iam_client.attach_role_policy(
    RoleName=f"kendra-{name}-role",
    PolicyArn=kendra_policy_arn
)


In [None]:
kendra_client = boto3.client('kendra')
kendra_data_source_response = kendra_client.create_data_source(
    Name="s3-data-storage",
    IndexId=kendra_index_id,
    Type="S3",
    Configuration={
        'S3Configuration': {
            'BucketName': kendra_bucket_name,
        }
    },
    RoleArn=kendra_role_arn,
)

Start the Kendra Sync Job to index the documents from the bucket

In [None]:
kendra_sync_response = kendra_client.start_data_source_sync_job(
    Id=kendra_data_source_response["Id"],
    IndexId=kendra_index_id
)
print(kendra_sync_response)

Now that we index files to Kendra our prompts context will be richer, and will improve the LLM response, head over to the chatbot, click on the "Reset Session" to get a new user ID, and have no chat memory. 

Ask again "What is Amazon EC2 Nitro Enclave?"

In [None]:
!aws cloudformation describe-stacks --stack-name gen-ai-immersion-day-stack --query "Stacks[0].Outputs[1].OutputValue"

Now when Kendra provides more context in the prompt, the model can respond to a question that it hasn't learn on better.

![with-rag](../img/nitro_enclave_rag.jpg)

## Extra - Add Kendra web crawler data source

We will crawl the LangChain documentation, We will ask before, and after adding the data into Kendra

![before-langchain](../img/langchain-before-crawl.jpg)

In [None]:
langchain_url = "https://python.langchain.com/docs/get_started/"

kendra_client = boto3.client('kendra')
kendra_data_source_response = kendra_client.create_data_source(
    Name='web-crawler-data-source',
    IndexId=kendra_index_id,
    Type='TEMPLATE',
    Configuration={
        'TemplateConfiguration': {
            'Template': {
                'connectionConfiguration': {
                    'repositoryEndpointMetadata': {
                        's3SeedUrl': None,
                        'siteMapUrls': None,
                        'seedUrlConnections': [
                            {
                                'seedUrl': langchain_url
                            }
                        ],
                        's3SiteMapUrl': None,
                        'authentication': 'NoAuthentication'
                    }
                },
                'enableIdentityCrawler': False,
                'syncMode': "FORCED_FULL_CRAWL",
                'additionalProperties': {
                    'inclusionFileIndexPatterns': [],
                    'rateLimit': '300',
                    'maxFileSize': '50',
                    'crawlDepth': '2',
                    'crawlAllDomain': True,
                    'crawlSubDomain': False,
                    'inclusionURLIndexPatterns': [],
                    'exclusionFileIndexPatterns': [],
                    'proxy': {},
                    'exclusionURLCrawlPatterns': [],
                    'exclusionURLIndexPatterns': [],
                    'crawlAttachments': False,
                    'honorRobots': True,
                    'inclusionURLCrawlPatterns': [],
                    'maxLinksPerUrl': '100'
                },
                'type': 'WEBCRAWLERV2',
                'version': '1.0.0',
                'repositoryConfigurations': {
                    'attachment': {
                        'fieldMappings': [
                            {
                                'dataSourceFieldName': 'category',
                                'indexFieldName': '_category',
                                'indexFieldType': 'STRING'
                            },
                            {
                                'dataSourceFieldName': 'sourceUrl',
                                'indexFieldName': '_source_uri',
                                'indexFieldType': 'STRING'
                            }
                        ]
                    },
                    'webPage': {
                        'fieldMappings': [
                            {
                                'dataSourceFieldName': 'category',
                                'indexFieldName': '_category',
                                'indexFieldType': 'STRING'
                            },
                            {
                                'dataSourceFieldName': 'sourceUrl',
                                'indexFieldName': '_source_uri',
                                'indexFieldType': 'STRING'
                            }
                        ]
                    }
                }
            }
        }
    },          
    RoleArn=kendra_role_arn,
    LanguageCode="en",
    Schedule="",
    Description=""
)


In [None]:
kendra_sync_response = kendra_client.start_data_source_sync_job(
    Id=kendra_data_source_response["Id"],
    IndexId=kendra_index_id
)

In [None]:
from time import sleep
import sys

def get_kendra_sync_status(data_source_id, index_id):
    response = kendra_client.list_data_source_sync_jobs(
        Id=data_source_id,
        IndexId=index_id
    )
    return response

while True:
    job_count = get_kendra_sync_status(kendra_data_source_response["Id"],kendra_index_id)
    if len(job_count['History']) > 0:
        print("Sync job started")
        break
    else:
        sleep(5)

while True:
    status = get_kendra_sync_status(kendra_data_source_response["Id"],kendra_index_id)
    if  status['History'][0]['Status'] not in ["SYNCING", "STOPPING", "SYNCING_INDEXING"]:
        print(f"Sync job {status['History'][0]['ExecutionId']} finished with status {status['History'][0]['Status']}")
        break
    else:
        sys.stdout.write(".")
        sys.stdout.flush()
        sleep(5)
        
if status['History'][0]['Status'] in ["FAILED","INCOMPLETE"]:    
    print("\nCheck sync failure in Kendra")
    
        

![after](../img/langchain-after.jpg)

## Cleanup

In [None]:
!sam delete --no-prompts --stack-name gen-ai-immersion-day-stack

In [35]:
!aws sagemaker delete-endpoint --endpoint-name {sagemaker_endpoint_name}

In [36]:
!aws kendra delete-index --id {kendra_index_id}