In [None]:
!pip install gradio

In [None]:
import whisper
import gradio as gr

In [None]:
def transcribe(audio_file):
    model = whisper.load_model('base')
    result = model.transcribe(audio_file)
    return result['text']

In [None]:
gr.Interface(fn=transcribe, 
            inputs=gr.Audio(sources=['microphone', 'upload'], type='filepath'), 
            outputs=[
                'text'
                ]).launch(debug=True)

# RAG-QA System

In [12]:
%pip install chromadb==0.4.15
%pip install langchainhub

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 23.3.1
[notice] To update, run: C:\Users\natha\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Collecting langchainhubNote: you may need to restart the kernel to use updated packages.

  Downloading langchainhub-0.1.14-py3-none-any.whl (3.4 kB)
Collecting types-requests<3.0.0.0,>=2.31.0.2
  Downloading types_requests-2.31.0.10-py3-none-any.whl (14 kB)
  Downloading types_requests-2.31.0.9-py3-none-any.whl (14 kB)
  Downloading types_requests-2.31.0.8-py3-none-any.whl (14 kB)
  Downloading types_requests-2.31.0.7-py3-none-any.whl (14 kB)
  Downloading types_requests-2.31.0.6-py3-none-any.whl (14 kB)
Collecting types-urllib3
  Downloading types_urllib3-1.26.25.14-py3-none-any.whl (15 kB)
Installing collected packages: types-urllib3, types-requests, langchainhub
Successfully installed langchainhub-0.1.14 types-requests-2.31.0.6 types-urllib3-1.26.25.14



[notice] A new release of pip is available: 23.0.1 -> 23.3.1
[notice] To update, run: C:\Users\natha\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
import getpass
openai_api_key = getpass.getpass('Enter OpenAI API key:')

In [5]:
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [6]:
loader = CSVLoader(file_path='chunk_text_db.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['transcript', 'location']
})
data = loader.load()

In [7]:
print(data)

[Document(page_content='transcript: transcript\nlocation: location', metadata={'source': 'chunk_text_db.csv', 'row': 0}), Document(page_content='transcript: the Gospel according to Mark. Chapter 1\nlocation: mark1-0.mp3', metadata={'source': 'chunk_text_db.csv', 'row': 1}), Document(page_content="transcript: the beginning of the gospel of Jesus Christ, the Son of God. As it is written in Isaiah the prophet, behold I send my messenger before your face, who will prepare your way, the voice of one crying in the wilderness, prepare the way of the Lord, make His paths straight. John appeared, baptizing in the wilderness and proclaiming a baptism of repentance for the forgiveness of sins, and all the country of Judea and all Jerusalem were going out to Him and were being baptized by Him in the river Jordan, confessing their sins. Now John was clothed with camel's hair and wore a leather belt around his waist and ate locusts and wild honey, and he preached, saying, after me comes He who is mi

In [8]:
data[1].page_content

'transcript: the Gospel according to Mark. Chapter 1\nlocation: mark1-0.mp3'

In [9]:
vectorstore = Chroma.from_documents(documents=data, 
                                    embedding=OpenAIEmbeddings(openai_api_key=openai_api_key))

In [10]:
retriever = vectorstore.as_retriever()

## Retrieve Relevant Audio

In [20]:
retrieved_docs = retriever.get_relevant_documents(
    "Who was the brother of Andrew?"
)
print(len(retrieved_docs))
print(retrieved_docs[0].page_content)

4
transcript: Passing alongside the sea of Galilee, he saw Simon and Andrew the brother of Simon casting a net into the sea for they were fishermen. And Jesus said to them, ï¿½Follow me, and I will make you become fishers of men.ï¿½ And immediately they left their nets and followed him. And going on a little farther, he saw James, the son of Zebedee, and John his brother, who were in their boat mending the nets. And immediately he called them, and they left their father Zebedee in the boat with the hired servants, and followed him. And they went into Capernaum, and immediately on the Sabbath he entered the synagogue and was teaching. And they were astonished at his teaching, for he taught them as one who had authority, and not as describes. And immediately there was in their synagogue a man with an unclean spirit. And he cried out, ï¿½What have you to do with us, Jesus of Nazareth? Have you come to destroy us? I know who you are, the Holy One of God.ï¿½
location: mark1-3.mp3


In [None]:
#TODO: extract location & return that audio file as output

## QA

In [19]:
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=openai_api_key)

In [15]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [16]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [17]:
rag_chain.invoke("Who was the brother of Andrew?")

'The brother of Andrew was Simon.'