In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from lmqg import TransformersQG
from lmqg.exceptions import AnswerNotFoundError
import speech_recognition as sr
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def scrape_paragraphs(topic, num_paragraphs):
    f_topic = topic.capitalize()
    f_topic = '_'.join(f_topic.split()).lower()

    url=f'https://en.wikipedia.org/wiki/{f_topic}'
    response=requests.get(url)
    soup=BeautifulSoup(response.text, 'html.parser')

    paragraphs=soup.find_all('p')[:num_paragraphs]
    paragraph_texts=[p.get_text() for p in paragraphs]

    return paragraph_texts

#question from a paragraph using LMQG
def generate_question(paragraph):
    try:
        model=TransformersQG(language="en")
        result=model.generate_qa(paragraph)
        
        question=result[0][0]
        answer=result[0][1]
        return question,answer
    except AnswerNotFoundError:
        return "Error: No answer candidates found"

#record speech input from the user
class AnswerRecorder:
    def __init__(self):
        self.response = ""
    def record_response(self):
        recognizer =sr.Recognizer()
        with sr.Microphone() as source:
            print("Speak now:")
            recognizer.adjust_for_ambient_noise(source)
            audio=recognizer.listen(source)
        try:
            response=recognizer.recognize_google(audio)
            print("You said:", response)
            return response
        except sr.UnknownValueError:
            print("Sorry, I could not understand what you said.")
            return ""
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))
            return ""

#compute cosine similarity between user answer and lmqg answer
def compute_similarity(answer1, answer2):
    vectorizer=CountVectorizer().fit_transform([answer1, answer2])
    vectors=vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]


topic= input("Enter the topic you want to interview about: ")
num_paragraphs=int(input("Enter the number of paragraphs to scrape: ")) #ensures exact depth of conversation you want

paragraphs=scrape_paragraphs(topic, num_paragraphs)
answer_recorder = AnswerRecorder()
user_answers=[]
#loop of interview
for paragraph in paragraphs:
    q_a_pair=generate_question(paragraph)

    if len(q_a_pair)==2:
        question,lmqg_answer=q_a_pair
        print("Question:", question)
    
        user_answer = answer_recorder.record_response()
        user_answers.append(user_answer)
        if user_answer:
        
            similarity = compute_similarity(user_answer, lmqg_answer)
            print("Similarity:", similarity)

            if similarity <= 0.5:
                print("Your answer seems quite different and may not be entirely correct. Here's the suggested answer:")
                print("LMQG Answer:", lmqg_answer)
    else:
        print(" Error: No answer candidates found for this para")

Enter the topic you want to interview about:  Air pollution
Enter the number of paragraphs to scrape:  4


100%|███████████████████████████| 1/1 [00:00<00:00, 340.12it/s]


 Error: No answer candidates found for this para


100%|███████████████████████████| 4/4 [00:00<00:00, 222.45it/s]
100%|███████████████████████████| 4/4 [00:00<00:00, 420.08it/s]


Question: What are substances that are harmful to the health of humans and other living beings?
Speak now:
You said: pollutants in the air
Similarity: 0.75


100%|███████████████████████████| 2/2 [00:00<00:00, 992.62it/s]
100%|██████████████████████████| 2/2 [00:00<00:00, 1984.06it/s]


Question: What is air quality closely related to?
Speak now:
You said: ecosystem of the earth
Similarity: 0.25
Your answer seems quite different and may not be entirely correct. Here's the suggested answer:
LMQG Answer: Earth's climate and ecosystems


100%|███████████████████████████| 3/3 [00:00<00:00, 746.67it/s]
100%|███████████████████████████| 1/1 [00:00<00:00, 499.68it/s]


Question: What does poor air quality affect?
Speak now:
Sorry, I could not understand what you said.
