In [12]:
import boto3
import os
import time
import urllib
import json
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock

transcribe_client = boto3.client('transcribe')
from bedrock import get_bedrock_client
bedrock_client = get_bedrock_client(region='us-east-1', runtime=True)

In [13]:
def transcribe_file(job_name, file_uri, transcribe_client):
    transcribe_client.start_transcription_job(
        TranscriptionJobName=job_name,
        Media={'MediaFileUri': file_uri},
        MediaFormat='mp4',
        LanguageCode='en-US'
    )
    max_tries = 60
    while max_tries > 0:
        max_tries -= 1
        job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
        job_status = job['TranscriptionJob']['TranscriptionJobStatus']
        if job_status in ['COMPLETED', 'FAILED']:
            print(f"Job {job_name} is {job_status}.")
            if job_status == 'COMPLETED':
                response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                data = json.loads(response.read())
                text = data['results']['transcripts'][0]['transcript']
            break
        else:
            print(f"Waiting for {job_name}. Current status is {job_status}.")
        time.sleep(10)
    return text


In [14]:
file_uri = 's3://my-s3-doc-loader/aws1.mp4'
text = transcribe_file('My-video-to-text', file_uri, transcribe_client)

texts = [text]

text_splitter = SentenceTransformersTokenTextSplitter(chunk_size=400)
texts = text_splitter.create_documents(texts)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents=texts, embedding=embeddings)
retriever = db.as_retriever(search_type='mmr', search_kwargs={"k": 3})

Waiting for My-video-to-text. Current status is IN_PROGRESS.
Waiting for My-video-to-text. Current status is IN_PROGRESS.
Job My-video-to-text is COMPLETED.


In [17]:
template = """
Human: Answer the question as truthfully as possible using only the provided text
text:{context}
question:{question}
Assistant:"""
qa_prompt = PromptTemplate(template=template, input_variables=["context","question"])
chain_type_kwargs = { "prompt": qa_prompt}
llm = Bedrock(model_id="anthropic.claude-v2",client=bedrock_client)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=False
)

In [18]:
question="why AWS?"
result = qa.run(question)
print(result)

 Based on the provided text, some key reasons to use AWS are:

- It has a comprehensive and broadly adopted cloud platform used by organizations of all types and sizes to lower costs, become more agile, and innovate faster. 

- It provides on-demand delivery of technology services via the internet with pay-as-you-go pricing, so you only pay for what you use without upfront costs.

- It offers more services and features than any other cloud provider, making it faster, easier and more cost effective to move applications to the cloud.

- It offers a wide variety of purpose-built databases and the latest technologies to experiment, innovate quickly, and transform businesses. 

- It has a global network of regions with multiple availability zones for building scalable, fault tolerant, and highly available applications.

- It has unmatched experience, operational expertise, and security standards that customers can depend on for important applications.

- It has a large partner network, inde