In [1]:
import boto3, os

In [2]:
dialogue_prompt_file_path = 'dialogue_summary_prompt_template.txt'
region = 'us-east-1'
lambda_layer_arn='arn:aws:lambda:us-east-1:638139650817:layer:robb_bedrock_lambda_layer:1'
lambda_execution_role = "arn:aws:iam::638139650817:role/RobbLambdaRole"
ml_bucket_name_text = "robb.ml.bucket"
transcribe_bucket_name_text = "robb.transcribe.bucket"

In [3]:
#Helper functions

from IPython.display import HTML, display
import html
import json

def wrap( long_string, pre=False):
    escaped_string = html.escape(long_string)
    html_string = escaped_string.replace("\n", "<br>")
    if pre:
        display(HTML(f"<div style='width: 600px; word-wrap: break-word;'>{html_string}</div>"))
    else:
        display(HTML(f"<div style='width: 600px; word-wrap: break-word;'><pre>{html_string}</pre></div>"))
        
def text_file(file_path):
    try:
        # Open the file and read its contents into a string
        with open(file_path, 'r', encoding='utf-8') as file:
            text_string = file.read()

        # Print the contents (optional)
        print(f"{file_path}:")
        wrap(text_string)

    except FileNotFoundError:
        print(f"The file {file_path} was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

Write the summary prompt to local disk

In [4]:
%%writefile dialogue_summary_prompt_template.txt
I need to summarize a conversation. The transcript of the conversation is between the <data> XML like tags.

<data>
{{transcript}}
</data>

The detailed summary must contain a one word sentiment analysis, and a list of issues, problems or causes of friction
during the conversation. The output must be provided in JSON format shown in the following example. 

Example output:
{
    "version": 0.1,
    "sentiment": <sentiment>,
    "issues": [
        {
            "topic": <topic>,
            "summary": <issue_summary>,
        }
    ]
}

An `issue_summary` must only be one of:
{%- for topic in topics %}
 - `{{topic}}`
{% endfor %}

Write the JSON output and nothing more.

Here is the JSON output:

Writing dialogue_summary_prompt_template.txt


In [5]:
text_file(dialogue_prompt_file_path)


dialogue_summary_prompt_template.txt:


In [6]:
%%writefile TranscribeLambdaFunction.py

#############################################################
#
#
#############################################################

import boto3
import json 
import uuid

region = 'us-east-1'

transcribe_client = boto3.client('transcribe', region_name='us-east-1')
output_ml_bucket_name_text = "robb.ml.bucket"

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    
    print("The key is: " + key)
    
    if "mp3/"  in key: 
        print("This app is working on a  with json file.")
        return_status = lambda_transcribe_client(bucket, key)
     
        
    #return {
     #   'statusCode': {return_status['statusCode']},
      #  'body': json.dumps(f"Successfully summarized {key} from bucket {bucket}.")
       # }
       
def lambda_transcribe_client(bucket, key):

    # One of a few different checks to ensure we don't end up in a recursive loop.
    if key != "mp3/dialog.mp3": 
        print("This demo only works with dialog.mp3.")
        return

    try:
        
        job_name = 'transcription-job-' + str(uuid.uuid4()) # Needs to be a unique name

        response = transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': f's3://{bucket}/{key}'},
            MediaFormat='mp3',
            LanguageCode='en-US',
            OutputBucketName= output_ml_bucket_name_text,  # specify the output bucket
            OutputKey=f'json/{job_name}-transcript.json',
            Settings={
                'ShowSpeakerLabels': True,
                'MaxSpeakerLabels': 2
            }
        )
        
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Submitted transcription job for {key} from bucket {bucket}.")
    }

Writing TranscribeLambdaFunction.py


In [7]:
%%writefile LambdaFunctionSummarize.py

#############################################################
#
#
#############################################################

import boto3
import json 
from jinja2 import Template

region = 'us-east-1'

s3_client = boto3.client('s3')
bedrock_runtime = boto3.client('bedrock-runtime', region)

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    
    print("The key is: " + key)
    
    if "json/"  in key: 
        print("This app is working on a  with json file.")
        return_status = lambda_summary_client( bucket, key )
        
    return {
        'statusCode': '200',
        'body': json.dumps(f"Successfully summarized key from bucket bucket.") 
        }   
        
    #return {
     #   'statusCode': {return_status['statusCode']},
      #  'body': json.dumps(f"Successfully summarized {key} from bucket {bucket}.")
       # }
    
    
def lambda_summary_client( bucket, key ):
    
    # One of a few different checks to ensure we don't end up in a recursive loop.
    if "-transcript.json" not in key: 
        print("This demo only works with *-transcript.json.")
        return
    
    try: 
        file_content = ""
        
        response = s3_client.get_object(Bucket=bucket, Key=key)
        
        file_content = response['Body'].read().decode('utf-8')
        
        transcript = extract_transcript_from_textract(file_content)

        print(f"Successfully read file {key} from bucket {bucket}.")

        print(f"Transcript: {transcript}")
        
        summary = bedrock_summarisation(transcript)
        
        s3_client.put_object(
            Bucket=bucket,
            Key='results.txt',
            Body=summary,
            ContentType='text/plain'
        )
        
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
            }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Successfully summarized {key} from bucket {bucket}. Summary: {summary}")
        }

def extract_transcript_from_textract(file_content):

    transcript_json = json.loads(file_content)

    output_text = ""
    current_speaker = None

    items = transcript_json['results']['items']

    # Iterate through the content word by word:
    for item in items:
        speaker_label = item.get('speaker_label', None)
        content = item['alternatives'][0]['content']
        
        # Start the line with the speaker label:
        if speaker_label is not None and speaker_label != current_speaker:
            current_speaker = speaker_label
            output_text += f"\n{current_speaker}: "
        
        # Add the speech content:
        if item['type'] == 'punctuation':
            output_text = output_text.rstrip()  # Remove the last space
        
        output_text += f"{content} "
        
    return output_text        
        
def bedrock_summarisation(transcript):
    
    with open('dialogue_summary_prompt_template.txt', "r") as file:
        template_string = file.read()

    data = {
        'transcript': transcript,
        'topics': ['charges', 'location', 'availability']
    }
    
    template = Template(template_string)
    prompt = template.render(data)
    
    print(prompt)
    
    kwargs = {
        "modelId": "amazon.titan-text-express-v1",
        "contentType": "application/json",
        "accept": "*/*",
        "body": json.dumps(
            {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 2048,
                    "stopSequences": [],
                    "temperature": 0,
                    "topP": 0.9
                }
            }
        )
    }
    
    response = bedrock_runtime.invoke_model(**kwargs)

    summary = json.loads(response.get('body').read()).get('results')[0].get('outputText')    
    return summary        


Writing LambdaFunctionSummarize.py


In [8]:
# Deploy Lambda functions
from helpers.Lambda_Helper import Lambda_Helper

lambda_helper = Lambda_Helper(region, lambda_layer_arn, lambda_execution_role)
# deploy_function
# add_lambda_trigger

transcribe_lambda_function_name = "TranscribeLambdaFunction"
summarize_lambda_function_name = "LambdaFunctionSummarize"

lambda_helper.deploy_function( ["TranscribeLambdaFunction.py"],function_name=transcribe_lambda_function_name)

lambda_helper.deploy_function( ["LambdaFunctionSummarize.py", "dialogue_summary_prompt_template.txt"],function_name=summarize_lambda_function_name)

Using function name: TranscribeLambdaFunction
Zipping function...
Looking for existing function...
Function TranscribeLambdaFunction exists. Updating code...
Function TranscribeLambdaFunction code updated: 2024-03-28T22:25:37.000+0000
Done.
Using function name: LambdaFunctionSummarize
Zipping function...
Looking for existing function...
Function LambdaFunctionSummarize exists. Updating code...
Function LambdaFunctionSummarize code updated: 2024-03-28T22:25:43.000+0000
Done.


In [9]:
#Configure the S3 Bucket Lamdba notificatoion
filter_rule_list = [ {'Name': 'suffix','Value': 'mp3'},{'Name': 'prefix','Value': 'mp3/'}]

lambda_helper.add_lambda_trigger(transcribe_bucket_name_text, filter_rule_list=filter_rule_list, function_name=transcribe_lambda_function_name)

filter_rule_list = [ {'Name': 'suffix','Value': 'json'}, {'Name': 'prefix','Value': 'json/'}]

lambda_helper.add_lambda_trigger(ml_bucket_name_text, filter_rule_list=filter_rule_list, function_name=summarize_lambda_function_name)

Using function name of deployed function: TranscribeLambdaFunction
Function ARN : arn:aws:lambda:us-east-1:638139650817:function:TranscribeLambdaFunction
Removed existing permission: s3-trigger-permission-for-TranscribeLambdaFunction
Permission added with Statement: {
    "Sid": "s3-trigger-permission-for-TranscribeLambdaFunction",
    "Effect": "Allow",
    "Principal": {
        "Service": "s3.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": "arn:aws:lambda:us-east-1:638139650817:function:TranscribeLambdaFunction",
    "Condition": {
        "ArnLike": {
            "AWS:SourceArn": "arn:aws:s3:::robb.transcribe.bucket"
        }
    }
}
Bucket name: robb.transcribe.bucket and notification configuration: {'LambdaFunctionConfigurations': [{'LambdaFunctionArn': 'arn:aws:lambda:us-east-1:638139650817:function:TranscribeLambdaFunction', 'Events': ['s3:ObjectCreated:Put'], 'Filter': {'Key': {'FilterRules': [{'Name': 'suffix', 'Value': 'mp3'}, {'Name': 'prefix',

ml_bucket_name_text ='c99355a2566046l6057108t1w532252118-learners3bucket-9armuhia70cz'
arn:aws:s3:::{bucket_name}"

In [16]:
#Upload the Lambda functions and prompt into the Lambda service
from helpers.S3_Helper import S3_Helper
s3_helper = S3_Helper(region)
# upload_file
s3_helper.upload_file_to_bucket(transcribe_bucket_name_text, 'data/dialog.mp3', 'mp3/dialog.mp3')

Object 'data/dialog.mp3' uploaded to bucket 'robb.transcribe.bucket'


In [23]:
s3_helper.list_objects(ml_bucket_name_text)
#s3_helper.list_objects(transcribe_bucket_name_text)


Object: json/, Created on: 2024-03-12 20:42:40+00:00
Object: json/.write_access_check_file.temp, Created on: 2024-03-28 22:27:23+00:00
Object: json/transcription-job-23fcb320-5e6c-4c27-a0b3-3f3d70235ae3-transcript.json, Created on: 2024-03-28 22:27:40+00:00
Object: json/transcription-job-8aa5115b-cb5f-4a42-9af9-da97a55c4563-transcript.json, Created on: 2024-03-20 22:38:34+00:00
Object: json/transcription-job-cecadd03-c2df-4ccb-871d-b206d2a1ec96-transcript.json, Created on: 2024-03-27 22:16:03+00:00
Object: results.txt, Created on: 2024-03-28 22:27:46+00:00


In [24]:
s3_helper.download_object(ml_bucket_name_text, "results.txt")
text_file("results.txt")

Object 'results.txt' from bucket 'robb.ml.bucket' to './results.txt'
results.txt:


In [None]:
import os
def DeleteLocalFile( file_name:str):
  if os.path.exists(file_name):
    os.remove(file_name)
    
DeleteLocalFile("LambdaFunctionSummarize.py")
DeleteLocalFile("LambdaFunctionSummarize.zip")
DeleteLocalFile("dialogue_summary_prompt_template.txt")
DeleteLocalFile("TranscribeLambdaFunction.py")
DeleteLocalFile("TranscribeLambdaFunction.zip")
DeleteLocalFile("results.txt")