In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from lmqg import TransformersQG
import speech_recognition as sr
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from lmqg.exceptions import AnswerNotFoundError
import spacy

In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')


In [None]:
# Function to scrape paragraphs from a Wikipedia URL
def scrape_paragraphs(topic, num_paragraphs):
    f_topic = topic.capitalize()
    f_topic = '_'.join(f_topic.split()).lower()

    url = f'https://en.wikipedia.org/wiki/{f_topic}'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    paragraphs = soup.find_all('p')[:num_paragraphs]  # Limiting the number of paragraphs
    paragraph_texts = [p.get_text() for p in paragraphs]

    return paragraph_texts

# Function to generate questions from a paragraph using LMGQ
def generate_questions(paragraph):
    try:
        model = TransformersQG(language="en")
        qa = model.generate_qa(paragraph)
        return qa
    except AnswerNotFoundError as e:
        print(f"Error: {e}")
        return "Error: No answer candidates found"

# Class to handle speech recognition for user answers
class AnswerRecorder:
    def __init__(self):
        self.response = ""

    def record_response(self):
        recognizer = sr.Recognizer()
        with sr.Microphone() as source:
            print("Speak now:")
            recognizer.adjust_for_ambient_noise(source)  # Adjust for noise
            audio = recognizer.listen(source)

        try:
            self.response = recognizer.recognize_google(audio)
            print("You said:", self.response)
        except sr.UnknownValueError:
            print("Sorry, I couldn't understand what you said.")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))

# Function to compute cosine similarity between two strings
def compute_similarity(answer1, answer2):
    vectorizer = CountVectorizer().fit_transform([answer1, answer2])
    vectors = vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]

# Example usage:
topic = input("Enter the topic you want to interview about: ")
num_paragraphs = int(input("Enter the number of paragraphs to scrape: "))

# Scrape paragraphs from Wikipedia
paragraphs = scrape_paragraphs(topic, num_paragraphs)

# Generate questions from the scraped paragraphs
questions = [generate_questions(paragraph) for paragraph in paragraphs]

# Initialize AnswerRecorder
answer_recorder = AnswerRecorder()

# Ask questions one by one and record user answers
user_answers = []
for question, paragraph in zip(questions, paragraphs):
    print("Question:", question)
    answer_recorder.record_response()
    user_answer = answer_recorder.response
    user_answers.append(user_answer)
    
    lmqg_answer = generate_questions(paragraph)  # LMGQ-generated answer
    similarity = compute_similarity(user_answer, lmqg_answer)
    print("Similarity:", similarity)

# Create a DataFrame to store questions and user answers
qa_df = pd.DataFrame({'Questions': questions, 'User_Answers': user_answers})

# Save the DataFrame to an Excel file
qa_df.to_excel('user_answers.xlsx', index=False)


In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from lmqg import TransformersQG
from lmqg.exceptions import AnswerNotFoundError  # Import AnswerNotFoundError
import speech_recognition as sr
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to scrape paragraphs from a Wikipedia URL
def scrape_paragraphs(topic, num_paragraphs):
    f_topic = topic.capitalize()
    f_topic = '_'.join(f_topic.split()).lower()

    url = f'https://en.wikipedia.org/wiki/{f_topic}'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    paragraphs = soup.find_all('p')[:num_paragraphs]  # Limiting the number of paragraphs
    paragraph_texts = [p.get_text() for p in paragraphs]

    return paragraph_texts

# Function to generate a question from a paragraph using LMQG
def generate_question(paragraph):
    try:
        model = TransformersQG(language="en")
        question = model.generate_qa(paragraph)[0][0]  # Only the first question
        return question
    except AnswerNotFoundError:
        return "Error: No answer candidates found"

# Function to record speech input from the user
def record_response():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Speak now:")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        response = recognizer.recognize_google(audio)
        print("You said:", response)
        return response
    except sr.UnknownValueError:
        print("Sorry, I could not understand what you said.")
        return ""
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))
        return ""

# Function to compute cosine similarity between two strings
def compute_similarity(answer1, answer2):
    vectorizer = CountVectorizer().fit_transform([answer1, answer2])
    vectors = vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]

# Example usage:
topic = input("Enter the topic you want to interview about: ")
num_paragraphs = int(input("Enter the number of paragraphs to scrape: "))

# Scrape paragraphs from Wikipedia
paragraphs = scrape_paragraphs(topic, num_paragraphs)

# Initialize AnswerRecorder
answer_recorder = AnswerRecorder()

# Ask questions one by one and record user answers
for paragraph in paragraphs:
    question = generate_question(paragraph)
    print("Question:", question)
    
    # Record user's response
    user_answer = record_response()
    if user_answer:
        # Generate LMQG answer for the same question
        lmqg_answer = generate_answer(paragraph, question)  # You need to implement this function

        # Compute similarity between user's answer and LMQG answer
        similarity = compute_similarity(user_answer, lmqg_answer)
        print("Similarity:", similarity)