# In this project we will combine voice2text with LLM, such that we can extract insights from a text just by asking questions through voice.

In [5]:
#firsly importing a sample text from website, extraction done through beatifulsoap

In [2]:
import requests
from bs4 import BeautifulSoup

# Wikipedia URL
url = "https://en.wikipedia.org/wiki/Uttarakhand"

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

# Find all paragraphs
paragraphs = soup.find_all("p")

# Extract text from paragraphs
text_data = [p.get_text() for p in paragraphs]

In [3]:
text_data

['\n',
 "Uttarakhand (English: /ˈʊtərɑːkʌnd/,[19] /ˌʊtərəˈkʌnd/[20] or /ˌʊtəˈrækənd/;[21] Hindi: [ˈʊtːərɑːkʰəɳɖ], lit.\u2009'Northern Land'), formerly known as Uttaranchal (English: /ˌʊtəˈræntʃʌl/; the official name until 2007),[22] is a state in northern India. The state is divided into two divisions, Garhwal and Kumaon, with a total of 13 districts. The winter capital and largest city of the state is Dehradun, which is also a railhead. On 5 March 2020, Bhararisain, a town in the Gairsain Tehsil of the Chamoli district, was declared as  the summer capital of Uttarakhand.[23][24] The High Court of the state is located in Nainital, but is to be moved to Haldwani in future.[25]\n",
 'Uttarakhand borders the Tibet Autonomous Region of China to the north; the Sudurpashchim Province of Nepal to the east; the Indian states of Uttar Pradesh to the south and Himachal Pradesh to the west and north-west. It is often referred to as the "Devbhumi" (lit.\u2009\'Land of the Gods\')[26] due to its re

#NOW WE WILL CLEAN THE TEXT EXTRACTED FROM THE WEBSITE:

In [12]:
#nltk would be used for this task

import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# Concatenate the list of strings into a single text block
text_data_concatenated = " ".join(text_data)

# Remove HTML tags
clean_text = re.sub("<.*?>", "", text_data_concatenated)

# Remove special characters
clean_text = re.sub(r"[^\w\s]", "", clean_text)

# Tokenization
tokens = word_tokenize(clean_text)

# Lowercasing
tokens = [token.lower() for token in tokens]

# Remove stop words
stop_words = set(stopwords.words("english"))
tokens = [token for token in tokens if token not in stop_words]

# Stemming
stemmer = PorterStemmer()
tokens = [stemmer.stem(token) for token in tokens]

# Join tokens back into a string
clean_text = " ".join(tokens)

#print(clean_text)

In [6]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

In [7]:
# Load the RoBERTa tokenizer and model for question answering
tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

Downloading tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/496M [00:00<?, ?B/s]

# NOW WE WILL TAKE A SAMLPE RUN OF OUR MODEL

In [8]:
from transformers import pipeline

# Load the question-answering pipeline
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

questions = [
    "How many districts are there in Uttarakhand?",
    "What is the winter capital of Uttarakhand?",
    "When was Uttarakhand declared as the 27th state of India?"
]

# Define the context
context = clean_text

# Loop over each question and get the answer
for question in questions:
    # Use the QA pipeline to get the answer
    answer = qa_pipeline(question=question, context=context)

    print("Question:", question)
    print("Answer:", answer['answer'])
    print()  # Adds a newline for better readability


Question: How many districts are there in Uttarakhand?
Answer: 13

Question: What is the winter capital of Uttarakhand?
Answer: dehradun

Question: When was Uttarakhand declared as the 27th state of India?
Answer: 2000 thu 9 novemb 2000



So we can see, model is clearly working well. Little bit less precision is due to data preprocessing step, otherwise its all fine.

# Now we will work on out voice to text model

In [9]:
import speech_recognition as sr

# Initialize the recognizer
recognizer = sr.Recognizer()

# Use the default microphone as the audio source
with sr.Microphone() as source:
    print("Listening...")

    # Adjust for ambient noise
    recognizer.adjust_for_ambient_noise(source)

    # Capture audio input
    audio = recognizer.listen(source)

    print("Recognizing...")

    try:
        # Recognize speech using Google Speech Recognition
        text = recognizer.recognize_google(audio)

        print("You said:", text)
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

Listening...
Recognizing...
You said: hello my name is Piyush


# SO TILL NOW I SHOWED HOW BOTH WORKS SEPERATELY. NOW WE WILL COMBINE BOTH AND SEE HOW IT WORKS OUT.

In [11]:
#most of the task is done above, we to now just do few modifications for things to work out

with sr.Microphone() as source:
    print("Listening...")

    # Adjust for ambient noise
    recognizer.adjust_for_ambient_noise(source)

    # Capture audio input
    audio = recognizer.listen(source)

    print("Recognizing...")

    try:
        # Recognize speech using Google Speech Recognition
        question = recognizer.recognize_google(audio)

        answer = qa_pipeline(question=question, context=context)

        print("You asked:", question)
        print("Answer:", answer['answer'])
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

Listening...
Recognizing...
You asked: what is the gender ratio in Uttarakhand
Answer: 963 femal per 1000 males


SO, I JUST USED A SAMPLE TEXT FROM THE WEBSITE TO ILLUSTRATE HOW IT WOULD EXACTLY WORK. WE CAN USE THE MODEL WITH OTHER TEXTS ALSO.
THE MODEL UNDERSTANDS THE CONTEXT AND ANSWERS THE QUESTIONS, AS PER THE VOICE COMMANDS OF USER.