In [1]:
# install boto3 

# !pip install boto3 --upgrade
# pip install aws-utils
# pip install aws-python-utils

In [None]:
# for getting started this is prerequisite...
# follow the instructions
# https://docs.aws.amazon.com/kendra/latest/dg/gs-prerequisites.html


In [14]:
import boto3
from botocore.exceptions import ClientError
import pprint
import time

kendra = boto3.client("kendra")

print("Create an index")

description = "Getting started index"
index_name = "python-getting-started-index01"
index_role_arn = "arn:aws:iam::024615928572:role/KendraRoleForGettingStartedIndex"

try:
    index_response = kendra.create_index(
        Description = description,
        Name = index_name,
        RoleArn = index_role_arn
    )

    pprint.pprint(index_response)

    index_id = index_response["Id"]

    print("Wait for Kendra to create the index.")

    while True:
        # Get index description
        index_description = kendra.describe_index(
            Id = index_id
        )
        # When status is not CREATING quit.
        status = index_description["Status"]
        print("    Creating index. Status: "+status)
        time.sleep(60)
        if status != "CREATING":
            break

    print("Create an S3 data source")

    data_source_name = "python-getting-started-data-source"
    data_source_description = "Getting started data source."
    s3_bucket_name = "ml-flow01"
    data_source_type = "S3"
    data_source_role_arn = "arn:aws:iam::024615928572:role/KendraRoleForGettingStartedDataSource"

    configuration = {"S3Configuration":
        {
            "BucketName": s3_bucket_name
        }
    }

    data_source_response=kendra.create_data_source(
        Configuration = configuration,
        Name = data_source_name,
        Description = description,
        RoleArn = data_source_role_arn,
        Type = data_source_type,

        IndexId = index_id
    )

    pprint.pprint(data_source_response)

    data_source_id = data_source_response["Id"]

    print("Wait for Kendra to create the data source.")

    while True:
        data_source_description = kendra.describe_data_source(
            Id = data_source_id,
            IndexId = index_id
        )
        # When status is not CREATING quit.
        status = data_source_description["Status"]
        print("    Creating data source. Status: "+status)
        time.sleep(60)
        if status != "CREATING":
            break

    print("Synchronize the data source.")

    sync_response = kendra.start_data_source_sync_job(
        Id = data_source_id,
        IndexId = index_id
    )

    pprint.pprint(sync_response)

    print("Wait for the data source to sync with the index.")

    while True:

        jobs = kendra.list_data_source_sync_jobs(
            Id=data_source_id,
            IndexId=index_id
        )

        # There should be exactly one job item in response
        status = jobs["History"][0]["Status"]

        print("    Syncing data source. Status: "+status)
        if status != "SYNCING":
            break
        time.sleep(60)

except  ClientError as e:
        print("%s" % e)

print("Program ends.")

Create an index
{'Id': 'f3211a85-965f-46b0-83f1-7bab7067277f',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '45',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Mon, 22 Mar 2021 18:57:10 GMT',
                                      'x-amzn-requestid': '70dbd505-6f94-4467-9615-8e463fdc59f7'},
                      'HTTPStatusCode': 200,
                      'RequestId': '70dbd505-6f94-4467-9615-8e463fdc59f7',
                      'RetryAttempts': 0}}
Wait for Kendra to create the index.
    Creating index. Status: CREATING
    Creating index. Status: ACTIVE
Create an S3 data source
{'Id': 'ba6f25e5-46d2-4f33-a0c3-5bb78f78f0ad',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '45',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Mon, 22 Mar 2021 18:59:11 GMT',
                                      'x-amz

In [17]:
# copy the index id

In [16]:
import boto3

kendra = boto3.client('kendra')

index_id = 'f3211a85-965f-46b0-83f1-7bab7067277f'

title = 'Information about Amazon.com'
text = 'Amazon.com is an online retailer. It also provide cloud services'

document = {
    "Id": "1",
    "Blob": text,
    "ContentType": "PLAIN_TEXT",
    "Title": title
}

documents = [
    document
]

result = kendra.batch_put_document(
    IndexId = index_id,
    Documents = documents
)

print(result)

{'FailedDocuments': [], 'ResponseMetadata': {'RequestId': '35d92c9a-4c0d-4897-9d53-3fb5c7657011', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '35d92c9a-4c0d-4897-9d53-3fb5c7657011', 'content-type': 'application/x-amz-json-1.1', 'content-length': '22', 'date': 'Mon, 22 Mar 2021 19:05:07 GMT'}, 'RetryAttempts': 0}}


In [20]:
# accessing from s3 bucket

In [19]:
# kendra = boto3.client('kendra')

# index_id = '3b99ea70-07d2-4940-bb64-d770f0d05d55'
role_arn = 'arn:aws:iam::024615928572:role/KendraRoleForGettingStartedIndex'

faq_path = {
    'Bucket': 'ml-flow01',
    'Key': 'SeattleBuildings.csv'
}

response = kendra.create_faq(
    S3Path =  faq_path,
    Name = 'SeattleBuildings',
    IndexId = index_id,
    RoleArn = role_arn,
    FileFormat = 'CSV'
)

print(response)

{'Id': '1ecd895b-1639-46d4-bcd1-8c1c42b4831b', 'ResponseMetadata': {'RequestId': '15bf52d0-076c-40ae-b681-3c2657d10f27', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '15bf52d0-076c-40ae-b681-3c2657d10f27', 'content-type': 'application/x-amz-json-1.1', 'content-length': '45', 'date': 'Mon, 22 Mar 2021 19:05:47 GMT'}, 'RetryAttempts': 0}}


In [22]:
query_s3= 'What is the height of the Space Needle?'

In [21]:
query='what is Amazon.com'
# index_id='${indexID}'

response=kendra.query(
        QueryText = query,
        IndexId = index_id)

print ('\nSearch results for query: ' + query + '\n')        

for query_result in response['ResultItems']:

    print('-------------------')
    print('Type: ' + str(query_result['Type']))
        
    if query_result['Type']=='ANSWER':
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)

    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)

    print ('------------------\n\n') 


Search results for query: what is Amazon.com

-------------------
Type: ANSWER
Amazon.com is an online retailer. It also provide cloud services
------------------


-------------------
Type: DOCUMENT
Title: Information about Amazon.com
...Amazon.com is an online retailer. It also provide cloud services...
------------------


-------------------
Type: DOCUMENT
Title: SageMakerAutopilotCandidateDefinitionNotebook
...refers to [Xgboost tuning](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html) and [Linear learner tuning](https://docs.aws.amazon.com/sagemaker/latest/dg/linear-learner-tuning.html) for detailed explanations of the parameters.\n",
    "</div>"
   ]
  },
  {
   "cell_type...
------------------


-------------------
Type: DOCUMENT
Title: README
...is a configuration class that keeps track of all input and output to Amazon Simple Storage Service 
(Amazon S3) paths, conventions, and AWS and Amazon SageMaker shared variables (e.g., session and roles) 
for an in

In [24]:
response=kendra.query(
        QueryText = query_s3,
        IndexId = index_id)

print ('\nSearch results for query: ' + query_s3 + '\n')        

for query_result in response['ResultItems']:

    print('-------------------')
    print('Type: ' + str(query_result['Type']))
        
    if query_result['Type']=='ANSWER':
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)

    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)

    print ('------------------\n\n') 


Search results for query: What is the height of the Space Needle?

-------------------
Type: QUESTION_ANSWER
------------------


-------------------
Type: DOCUMENT
Title: SeattleBuildings
...What is the height of the Space Needle?, 605 feet, https://www.spaceneedle.com/
How tall is the Space Needle?, 605 feet, https://www.spaceneedle.com/
What is the height of the Smith Tower?, 484 feet, https://www.smithtower.com
How tall is the Smith Tower, 484 feet, https://www...
------------------


-------------------
Type: DOCUMENT
Title: SageMakerAutopilotDataExplorationNotebook
...Percent of Missing Values\n",
    "Within the data sample, the following columns contained missing values, such as: `nan`, white spaces, or empty fields.\n",
    "\n",
    "SageMaker Autopilot will attempt to fill in missing values using various techniques. For example,\n...
------------------


