# Google Speech Recognition + VertexAI

This notebook demonstrates automated form completion from recorded speech.

Each form field has an associated "question" which is asked to text-bison model in context of text that was recognized from speech.

In [17]:
from typing import List, Optional
from ast import literal_eval

from google.cloud import speech, storage
from ipywebrtc import AudioRecorder, CameraStream
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import VertexAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import VertexAI
from pydantic import BaseModel
import pandas as pd
from IPython.display import display, Markdown

PROJECT = 'wscc-dev-app-wsky'
BUCKET = 'wscc-dev-eai'
REGION = 'us-east4'

# TODO: I couldn't find a way to set SpeechClient project via Python.
# You'll need to run this until we find a better way.
# ! gcloud auth login
# ! gcloud auth application-default login
# ! gcloud config set project {PROJECT}

In [18]:
# Define form with some fields

class Field(BaseModel):
    name: str
    question: str

class StringField(Field):
    value: Optional[str]
    type: str = 'string'

class ListField(Field):
    value: Optional[List[str]]
    type: str = 'list'

class Form(BaseModel):
    fields: List[Field]

    def as_dataframe(self):
        return pd.DataFrame(
            [(field.name, field.type, field.question, field.value) for field in self.fields],
            columns=['name', 'type', 'question', 'value'],
        )

form = Form(fields=[
    StringField(name='full_name', question='What is patient\'s full name?'),
    StringField(name='age', question='What is patient age as number?'),
    StringField(name='primary_diagnosis', question='What is patient\'s primary diagnosis name?'),
    ListField(name='chief_complaints', question='What are patient\'s chief complaints as JSON list?'),
    ListField(name='medications', question='What medications does the patient take as JSON list?'),
])
display(Markdown('''## Created empty form without values
This form\'s values will be populated by AI.'''))
form.as_dataframe()

## Created empty form without values
This form's values will be populated by AI.

Unnamed: 0,name,type,question,value
0,full_name,string,What is patient's full name?,
1,age,string,What is patient age as number?,
2,primary_diagnosis,string,What is patient's primary diagnosis name?,
3,chief_complaints,list,What are patient's chief complaints as JSON list?,
4,medications,list,What medications does the patient take as JSON...,


In [19]:
# Record voice
# Run this cell, then record your voice before proceeding to the next cell.

# Sample text to read:
# The patient is a 78-year-old gentleman with no substantial past medical history except for diabetes.
# He denies any comorbid complications of the diabetes including kidney disease, heart disease, stroke, vision loss, or neuropathy.
# At this time, he has been admitted for anemia with hemoglobin of 7.1 and requiring transfusion.
# He reports that he has no signs or symptom of bleeding and had a blood transfusion approximately two months ago
# and actually several weeks before that blood transfusion, he had a transfusion for anemia.
# He has been placed on B12, oral iron, and Procrit.
# At this time, we are asked to evaluate him for further causes and treatment for his anemia.
# He denies any constitutional complaints except for fatigue, malaise, and some dyspnea.
# He has no adenopathy that he reports. No fevers, night sweats, bone pain, rash, arthralgias, or myalgias.

camera = CameraStream(constraints={'audio': True,'video':False})
recorder = AudioRecorder(stream=camera)
recorder

AudioRecorder(audio=Audio(value=b'', format='webm'), stream=CameraStream(constraints={'audio': True, 'video': â€¦

In [None]:
# Upload voice

PATH = 'voice/adunai-test.webm'

gcs = storage.Client(project=PROJECT)
bucket = gcs.bucket(BUCKET)
blob = bucket.blob(PATH)
blob.upload_from_string(bytes(recorder.audio.value))

In [21]:
# Recognize voice

speech_client = speech.SpeechClient()

# Long recordings (>=1 min):
operation = speech_client.long_running_recognize(speech.LongRunningRecognizeRequest(
    config=speech.RecognitionConfig(
        language_code="en",
        audio_channel_count=2,
    ),
    audio=speech.RecognitionAudio(
        uri=f'gs://{BUCKET}/{PATH}',
    )
))
# Block until we have a result
response = operation.result()

# Short recordings (<1 min)
# response = speech_client.recognize(speech.RecognizeRequest(
#     config=speech.RecognitionConfig(
#         language_code="en",
#         audio_channel_count=2,
#     ),
#     audio=speech.RecognitionAudio(
#         uri=f'gs://{BUCKET}/{PATH}',
#     )
# ))

text = '. '.join(result.alternatives[0].transcript for result in response.results)
text

"the patient is a 78 year old gentleman with no substantial past medical history except for diabetes she denies any comorbid complications of the diabetes including kidney disease heart disease stroke vision loss or neuropathy at this time he has been admitted for anemia with hemoglobin of 7.1 and requiring transfusion she reports that he has no signs or symptoms of bleeding and she had a blood transfusion approximately two months ago and actually several weeks before that blood transfusion he had a transfusion for anemia.  she's been placed on beach walls oral iron and truck right this time we were asked to evaluate him for further causes and treatment for his anemia he denies any constitutional complaints except for fatigue Melodies and some Disney he has no adenopathy that he reports no fevers night sweats bone pain rash arthralgia sore Mi GS"

In [22]:
# Prepate text for analysis
# Uncomment this if you didn't want to read the text in previous cells. :)
# text = "The patient is a 78-year-old gentleman with no substantial past medical history except for diabetes. He denies any comorbid complications of the diabetes including kidney disease, heart disease, stroke, vision loss, or neuropathy. At this time, he has been admitted for anemia with hemoglobin of 7.1 and requiring transfusion. He reports that he has no signs or symptom of bleeding and had a blood transfusion approximately two months ago and actually several weeks before that blood transfusion, he had a transfusion for anemia. He has been placed on B12, oral iron, and Procrit. At this time, we are asked to evaluate him for further causes and treatment for his anemia. He denies any constitutional complaints except for fatigue, malaise, and some dyspnea. He has no adenopathy that he reports. No fevers, night sweats, bone pain, rash, arthralgias, or myalgias."
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
docs = text_splitter.split_text(text)
print(f"# of documents = {len(docs)}")

# of documents = 1


In [23]:
# Prepare Q&A model
embeddings = VertexAIEmbeddings(project=PROJECT)
db = Chroma.from_texts(docs, embeddings)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 1})
llm = VertexAI(
    model_name="text-bison@001",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

In [24]:
for field in form.fields:
    result = qa({'query': field.question})['result']
    if field.type == 'list':
        field.value = literal_eval(result)
    else:
        field.value = result
form.as_dataframe()

Unnamed: 0,name,type,question,value
0,full_name,string,What is patient's full name?,The patient's full name is not given in the pa...
1,age,string,What is patient age as number?,The patient is 78 years old.
2,primary_diagnosis,string,What is patient's primary diagnosis name?,Anemia
3,chief_complaints,list,What are patient's chief complaints as JSON list?,"[fatigue, malaise, dyspnea]"
4,medications,list,What medications does the patient take as JSON...,"[{'name': 'beach walls oral iron', 'dosage': '..."


In [25]:
print(form)

fields=[StringField(name='full_name', question="What is patient's full name?", value="The patient's full name is not given in the passage.", type='string'), StringField(name='age', question='What is patient age as number?', value='The patient is 78 years old.', type='string'), StringField(name='primary_diagnosis', question="What is patient's primary diagnosis name?", value='Anemia', type='string'), ListField(name='chief_complaints', question="What are patient's chief complaints as JSON list?", value=['fatigue', 'malaise', 'dyspnea'], type='list'), ListField(name='medications', question='What medications does the patient take as JSON list?', value=[{'name': 'beach walls oral iron', 'dosage': 'oral', 'frequency': 'daily', 'duration': 'unknown'}, {'name': 'truck right', 'dosage': 'unknown', 'frequency': 'unknown', 'duration': 'unknown'}], type='list')]
