In [2]:
import boto3
import csv
import pandas as pd
import datetime
import time
import json


# create bedrock object
bedrock = boto3.client(service_name='bedrock-runtime')
s3_client = boto3.client("s3")
# S3 bucket
bucket_name = "callsummarystack-assetbucket-nr5kcqsneged"
#configurations
temp_config=0
top_p_config=0.995
top_k_config=250
max_token_config=1000
# Prompt for LLM
SUMMARY_INSTRUCTIONS='''Summarize the conversation between a caller and agent  in 3-5 sentences. 
    Focus ONLY on : What is the caller's goal/issue, How was it resolved , is any follow up action to be taken.
    If the conversation is not long enough to Summarize then just return 'NOT Applicable'.
    Here is a conversation between a call center agent and  caller : '''

# Summarizes the ASR script by invoking the Titan model
def summarize_titan(asr_script):
    query_input = "\n\n".join([SUMMARY_INSTRUCTIONS, asr_script])
    query_context = {
       "inputText": query_input,
       "textGenerationConfig": {
           "maxTokenCount": max_token_config,
           "stopSequences": ["User:"],
           "temperature":temp_config,
           "topP":1
        }
    }
    #print("---------asr_script ")
    #print(asr_script)
    #print("---------")
    response = bedrock.invoke_model(
        modelId = 'amazon.titan-text-express-v1',
        contentType = "application/json",
        accept = "*/*",
        body = json.dumps(query_context)
        )
    model_response = json.loads(response['body'].read().decode('utf-8'))
    summary=json.dumps(model_response['results'][0]['outputText'])
    #print("---------Titan ")
    #print(summary)
    #print("---------")
    return summary

# Summarizes the ASR script by invoking the Claude model
def summarize_claude(asr_script):
    # Create the payload to provide to the Anthropic model.
    body_claude = {
        "prompt": f"\n\nHuman: {SUMMARY_INSTRUCTIONS}{asr_script}\n\nAssistant:",
        "temperature": temp_config,
        "top_p": top_p_config,
        "top_k": top_k_config,
        "max_tokens_to_sample": max_token_config,
        "stop_sequences": ["\\n\\nHuman:"]
    }

    # Invoke the Anthropic model using the payload.
    response_claude = bedrock.invoke_model(
           modelId="anthropic.claude-v2:1", #"anthropic.claude-v2"
           contentType="application/json",
           accept="*/*",
           body=json.dumps(body_claude)
    )
    assistant_response = json.loads(response_claude['body'].read())['completion']
    #print("---------claude ")
    #print(assistant_response)
    #print("---------")
    return assistant_response
    
# Summarizes the ASR script by invoking the Jurassic-2 Ultra model
def summarize_j2ultra(asr_script):
    # Create the payload to provide to the Anthropic model.
    query_input = "\n\n".join([SUMMARY_INSTRUCTIONS, asr_script])
    body_ai = {
        "prompt": query_input,
        "maxTokens": max_token_config,
        "temperature": top_p_config,
        "topP": top_p_config
    }

    # Invoke the Anthropic model using the payload.
    response_ai = bedrock.invoke_model(
        modelId="ai21.j2-ultra-v1", 
        contentType="application/json",
        accept="*/*",
        body=json.dumps(body_ai)
    )
    response_body = json.loads(response_ai.get('body').read())
    summary=response_body.get('completions')[0].get('data').get('text')
    #print("---------AI21 Labs ")
    #print(summary)
    #print("---------")
    return summary


# Processes ASR (Automatic Speech Recognition) scripts and summarize results
def process_audio_asr():    
    # Create a DataFrame 
    df = pd.DataFrame({'col1': ['File Name'], 'col2': ['ASR Script'], 'col3': ['Titan'], 'col4': ['claude'], 'col5': ['Ai21']})
    
    cnt=0
    # load the asr scripts
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix="asr/")
    files = response.get("Contents")
    for file in files:
        #loop for each script
        key = file['Key']
        print(f"file_name: {file['Key']}, size: {file['Size']}")
        obj = s3_client.get_object(Bucket='callsummarystack-assetbucket-nr5kcqsneged', Key=key)
        try:
            data = pd.read_csv(obj['Body'])
            # Convert DataFrame to string
            data_string = ""
            summTitan=""
            summClaude=""
            summAi21=""
            #prepare the string for entire conversation
            for index, row in data.iterrows():
                row_string = ""
                for col in row:
                    row_string += str(col) + ":"
                row_string += "\n"
                data_string += row_string
            #print(data_string)
            
            #call Bedrock models 
            summTitan=summarize_titan(data_string)
            # Add 15 second sleep
            time.sleep(15)
            summClaude=summarize_claude(data_string)
            # Add 15 second sleep
            time.sleep(15)
            summAi21=summarize_j2ultra(data_string)
            # Add 15 second sleep
            time.sleep(15)
            
            new_row = pd.DataFrame({'col1': [key], 'col2': [data_string], 'col3': [summTitan], 'col4': [summClaude], 'col5': [summAi21]})
            # concat to existing DataFrame
            df = pd.concat([df, new_row]).reset_index(drop=True)
            cnt=cnt+1
            print(cnt)
            
        except Exception as e:
            print(type(e))
            print(f"Empty file: {key}")
            continue

    print("---")
    # Upload CSV to S3
    #s3_w = boto3.client("s3")
    # Get datetime stamp 
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    # Append to filename
    filename = f'data-{timestamp}.csv' 
    # Save DataFrame to CSV
    df.to_csv(filename)
    print("Uploading result")
    # Upload the CSV to S3 
    try:
        s3_client.upload_file(filename, 'callsummarystack-assetbucket-nr5kcqsneged', f'asr_results/{filename}')
    except :
        print("Error in writing result") 
    print("---")
        
            
            

process_audio_asr()


  from pandas.core import (


file_name: asr/, size: 0
<class 'pandas.errors.EmptyDataError'>
Empty file: asr/
file_name: asr/auo-call-asr-script.csv, size: 1337
1
file_name: asr/health-call-asr-script.csv, size: 1842
2
---
Uploading result
---
