In [None]:
import requests
from bs4 import BeautifulSoup
import spacy

def get_available_topics():
    url = 'https://www.bbc.com/news'

    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        topics_list = soup.select('.sc-44f1f005-0 .hCrDEz')

        if topics_list:
            topics = {i + 1: topic.text.strip() for i, topic in enumerate(topics_list)}
            return topics
        else:
            print("No topics found on the page.")
            return {}
    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return {}

def get_articles(topic):
    url = f'https://www.bbc.com/news/{topic}'

    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        articles_list = soup.select('.sc-44f1f005-0 .hCrDEz')

        if articles_list:
            return [article.text.strip() for article in articles_list]
        else:
            print("No articles found on the page.")
            return []
    else:
        print(f"Error: Unable to fetch articles. Status code: {response.status_code}")
        return []

def compare_statements(input_statement, documents):
    if not documents:
        print(f"No documents found for comparison.")
        return False

    nlp = spacy.load('en_core_web_sm')
    input_doc = nlp(input_statement)

    for document in documents:
        document_doc = nlp(document)
        similarity = input_doc.similarity(document_doc)

        # Adjust the similarity threshold as needed
        if similarity > 0.8:
            return True

    return False

def main():
    data_source = input("Enter the data source ('twitter' or 'bbc_news'): ")

    if data_source.lower() == 'bbc_news':
        available_topics = get_available_topics()

        if not available_topics:
            print("No topics available. Exiting.")
            return

        print("Available topics:")
        for number, topic in available_topics.items():
            print(f"{number}. {topic}")

        topic_number = int(input("Enter the number corresponding to the desired topic: "))
        topic_identifier = available_topics.get(topic_number)

        if not topic_identifier:
            print("Invalid topic number. Exiting.")
            return

        input_statement = input("Enter the statement to verify: ")
        documents = get_articles(topic_identifier)
    else:
        print("Invalid data source. Supported sources: 'bbc_news'")
        return

    if documents:
        is_legitimate = compare_statements(input_statement, documents)

        if is_legitimate:
            print("The statement is legitimate based on the data source.")
        else:
            print("The statement is not found among documents.")
    else:
        print("No documents found for the topic.")

if __name__ == "__main__":
    main()


Enter the data source ('twitter' or 'bbc_news'): bbc_news
Available topics:
1. Israel-Gaza War
2. War in Ukraine
3. World
4. US & Canada
5. UK
6. In Pictures
7. BBC Verify
Enter the number corresponding to the desired topic: 3
Enter the statement to verify: Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks.
Error: Unable to fetch articles. Status code: 404
No documents found for the topic.


In [None]:
import requests
from bs4 import BeautifulSoup

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def choose_topic(topics):
    print("Available BBC News Topics:")
    for topic in topics:
        print(f"{topic[0]}. {topic[1]}")

    user_choice = input("Enter the number corresponding to the desired topic: ")
    return user_choice

def scrape_topic_page(topic_url):
    response = requests.get(topic_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        # Add your scraping logic here
        print(f"Scraping data from {topic_url}")
        print("Scraped data:")
        #print(soup.prettify())
    else:
        print(f"Error: Unable to fetch topic page. Status code: {response.status_code}")

# Example usage:
bbc_topics = get_bbc_news_topics()

if bbc_topics:
    user_choice = choose_topic(bbc_topics)
    selected_topic = bbc_topics[int(user_choice) - 1]

    print(f"You selected: {selected_topic[1]}")
    scrape_topic_page(f'https://www.bbc.com{selected_topic[2]}')
else:
    print("No topics found.")





Available BBC News Topics:
1. Israel-Gaza War
2. War in Ukraine
3. World
4. US & Canada
5. UK
6. In Pictures
7. BBC Verify
Enter the number corresponding to the desired topic: 3
You selected: World
Scraping data from https://www.bbc.com/news/world
Scraped data:


In [None]:
import requests
from bs4 import BeautifulSoup

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def choose_topic(topics):
    print("Available BBC News Topics:")
    for topic in topics:
        print(f"{topic[0]}. {topic[1]}")

    user_choice = input("Enter the number corresponding to the desired topic: ")
    return user_choice

def get_user_statement():
    return input("Enter the statement you are looking for on the selected topic page: ")

def scrape_topic_page(topic_url, statement):
    response = requests.get(topic_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        # Add your specific scraping logic here
        # For example, checking if the statement is present in the page
        statement_found = statement.lower() in soup.get_text().lower()

        print(f"Scraping data from {topic_url}")
        print(f"Statement '{statement}' {'found' if statement_found else 'not found'} on the page.")
    else:
        print(f"Error: Unable to fetch topic page. Status code: {response.status_code}")

# Example usage:
bbc_topics = get_bbc_news_topics()

if bbc_topics:
    user_choice = choose_topic(bbc_topics)
    selected_topic = bbc_topics[int(user_choice) - 1]

    print(f"You selected: {selected_topic[1]}")
    user_statement = get_user_statement()
    scrape_topic_page(f'https://www.bbc.com{selected_topic[2]}', user_statement)
else:
    print("No topics found.")


Available BBC News Topics:
1. Israel-Gaza War
2. War in Ukraine
3. World
4. US & Canada
5. UK
6. In Pictures
7. BBC Verify
Enter the number corresponding to the desired topic: 3
You selected: World
Enter the statement you are looking for on the selected topic page: Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks. 
Scraping data from https://www.bbc.com/news/world
Statement 'Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks. ' not found on the page.


In [None]:
import requests
from bs4 import BeautifulSoup

def choose_data(data_list):
    print("Available Options:")
    for data in data_list:
        print(f"{data[0]}. {data[1]}")

    while True:
        user_choice = input("Enter the number corresponding to the desired option: ")
        if user_choice.isdigit() and 1 <= int(user_choice) <= len(data_list):
            return user_choice
        else:
            print("Invalid input. Please enter a valid number.")
def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # List of classes and IDs to check
        classes_and_ids_to_check = ['iframe', '.czRLo', '.jqwZKz', '.kTrQIN', '.kbvxap', '.crzIlm', '.dJMMNx', '.hPAmAW', '.huflns']

        for class_or_id in classes_and_ids_to_check:
            target_elements = soup.select(class_or_id)

            if target_elements:
                for index, element in enumerate(target_elements):
                    print(f"Content inside element with class or ID '{class_or_id}' {index + 1}:")
                    print(element.get_text(strip=True))
            else:
                print(f"No elements with class or ID '{class_or_id}' found.")
    else:
        print(f"Error: Unable to fetch data page. Status code: {response.status_code}")

# Example usage:
bbc_topics = get_bbc_news_topics()

if bbc_topics:
    user_choice = choose_topic(bbc_topics)
    selected_topic = bbc_topics[int(user_choice) - 1]

    print(f"You selected: {selected_topic[1]}")
    data_list = scrape_topic_page(f'https://www.bbc.com{selected_topic[2]}')

    if data_list:
        user_data_choice = choose_data(data_list)
        selected_data = data_list[int(user_data_choice) - 1]

        print(f"You selected: {selected_data[1]}")
        scrape_data_page(f'https://www.bbc.com{selected_data[2]}')

else:
    print("No topics found.")

def choose_topic(topics):
    print("Available BBC News Topics:")
    for topic in topics:
        print(f"{topic[0]}. {topic[1]}")

    user_choice = input("Enter the number corresponding to the desired topic: ")
    return user_choice

def get_user_statement():
    return input("Enter the statement you are looking for on the selected topic page: ")

def scrape_topic_page(topic_url, statement):
    response = requests.get(topic_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        layers = topic_url.split('/')[4:]  # Extract layers from the URL

        # Navigate through the layers
        current_layer = soup
        for layer in layers:
            current_layer = current_layer.find('a', {'href': f'/{layer}'})
            if not current_layer:
                print(f"Error: Unable to navigate to layer '{layer}' in the topic page.")
                return

        # Add your specific scraping logic here
        # For example, checking if the statement is present in the page
        statement_found = statement.lower() in current_layer.get_text().lower()

        print(f"Scraping data from {topic_url}")
        print(f"Statement '{statement}' {'found' if statement_found else 'not found'} on the page.")
    else:
        print(f"Error: Unable to fetch topic page. Status code: {response.status_code}")

# Example usage:
bbc_topics = get_bbc_news_topics()

if bbc_topics:
    user_choice = choose_topic(bbc_topics)
    selected_topic = bbc_topics[int(user_choice) - 1]

    print(f"You selected: {selected_topic[1]}")
    user_statement = get_user_statement()
    scrape_topic_page(f'https://www.bbc.com{selected_topic[2]}', user_statement)
else:
    print("No topics found.")


No elements with class or ID 'iframe' found.
Content inside element with class or ID '.czRLo' 1:
The American man lured two US women off a trail near Germany's famous Neuschwanstein Castle last year.
Content inside element with class or ID '.czRLo' 2:
Dramatic video shows the leak towering close to power lines blocking a road in College Station, Texas.
Content inside element with class or ID '.czRLo' 3:
The Kenyan and Nigerian presidents face domestic criticism for their frequent trips abroad.
Content inside element with class or ID '.czRLo' 4:
Kamal, from the occupied West Bank, had a well-paid job in Israel before the Hamas attacks - now he is in poverty.
Content inside element with class or ID '.czRLo' 5:
Australian three-time Olympic medallist Shane Rose is stood down from competition for wearing a mankini during a show jumping event.
Content inside element with class or ID '.czRLo' 6:
Laughter and tears for winners at the Bafta Awards 2024 in London.
Content inside element with cl

No elements with class or ID 'iframe' found.
Content inside element with class or ID '.czRLo' 1:
The American man lured two US women off a trail near Germany's famous Neuschwanstein Castle last year.
Content inside element with class or ID '.czRLo' 2:
Dramatic video shows the leak towering close to power lines blocking a road in College Station, Texas.
Content inside element with class or ID '.czRLo' 3:
The Kenyan and Nigerian presidents face domestic criticism for their frequent trips abroad.
Content inside element with class or ID '.czRLo' 4:
Kamal, from the occupied West Bank, had a well-paid job in Israel before the Hamas attacks - now he is in poverty.
Content inside element with class or ID '.czRLo' 5:
Australian three-time Olympic medallist Shane Rose is stood down from competition for wearing a mankini during a show jumping event.
Content inside element with class or ID '.czRLo' 6:
Laughter and tears for winners at the Bafta Awards 2024 in London.
Content inside element with cl

In [None]:
import requests
from bs4 import BeautifulSoup

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            print(f"Elements with class '{class_name}':")
            for element in elements:
                print(element.get_text(strip=True))
            print("\n")
        else:
            print(f"No elements with class '{class_name}' found.")

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scrape_with_classes(soup, classes_to_check)

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 3
You selected: World - URL: https://www.bbc.com/news/world
Elements with class 'gclMev':
The American man lured two US women off a trail near Germany's famous Neuschwanstein Castle last year.
Lawmakers are set to investigate claims the former government snooped on the phones of its opponents.
The UK-registered Rubymar, which is carrying "very dangerous" fertiliser, has been taking on water.
In a video message, Yulia Navalnaya also accused the Kremlin of being behind her husband's death in a Siberian jail.
"I know the justi

In [None]:
import requests
from bs4 import BeautifulSoup

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = []
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            for element in elements:
                scraped_data.append(element.get_text(strip=True))

    return scraped_data

def compare_statement(statement, scraped_data):
    for data in scraped_data:
        if statement.lower() in data.lower():
            return True
    return False

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scraped_data = scrape_with_classes(soup, classes_to_check)

                # Ask the user to enter a statement for comparison
                user_statement = input("Enter the statement to verify: ")

                # Compare the user's statement with the scraped data
                if compare_statement(user_statement, scraped_data):
                    print(f"The statement '{user_statement}' is found on the page. It might be legitimate.")
                else:
                    print(f"The statement '{user_statement}' is not found on the page. It might not be legitimate.")

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 3
You selected: World - URL: https://www.bbc.com/news/world
Enter the statement to verify: Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks.
The statement 'Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks.' is not found on the page. It might not be legitimate.


In [None]:
import requests
from bs4 import BeautifulSoup

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def compare_statement(statement, scraped_data):
    for class_name, data_list in scraped_data.items():
        for data in data_list:
            if statement.lower() in data.lower():
                return class_name, data
    return None, None

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scraped_data = scrape_with_classes(soup, classes_to_check)

                # Ask the user to enter a statement for comparison
                user_statement = input("Enter the statement to verify: ")

                # Compare the user's statement with the scraped data
                matched_class, matched_data = compare_statement(user_statement, scraped_data)

                if matched_class is not None and matched_data is not None:
                    print(f"The statement '{user_statement}' is found in the context of class '{matched_class}':")
                    print(matched_data)
                    print("Statement verified and legitimate!")
                else:
                    print(f"The statement '{user_statement}' is not found on the page. It might not be legitimate.")

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 3
You selected: World - URL: https://www.bbc.com/news/world
Enter the statement to verify: Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks.
The statement 'Protesters in Tel Aviv Tel Aviv protesters call on Netanyahu to resign The Israeli prime minister's popularity has declined among Israelis after the 7 October Hamas attacks.' is not found on the page. It might not be legitimate.


In [None]:
import requests
from bs4 import BeautifulSoup
from difflib import get_close_matches

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def compare_statement(statement, scraped_data):
    matches = get_close_matches(statement.lower(), [data.lower() for data_list in scraped_data.values() for data in data_list], n=1, cutoff=0.8)

    if matches:
        for class_name, data_list in scraped_data.items():
            for data in data_list:
                if matches[0] in data.lower():
                    return class_name, data
    return None, None

def scrape_and_verify(user_statement, url, classes_to_check):
    try:
        # Make a request for the selected news page
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')

            # Scrape elements with specified classes
            scraped_data = scrape_with_classes(soup, classes_to_check)

            # Compare the user's statement with the scraped data
            matched_class, matched_data = compare_statement(user_statement, scraped_data)

            if matched_class is not None and matched_data is not None:
                print(f"The statement '{user_statement}' is found in the context of class '{matched_class}':")
                print(matched_data)
                print("Statement verified and legitimate!")
            else:
                if scraped_data:
                    print(f"The statement '{user_statement}' is not found. Similar data includes:")
                    for class_name, data_list in scraped_data.items():
                        print(f"Class '{class_name}': {data_list}")
                else:
                    print("No data found on the page.")

        else:
            print(f"Error: Unable to fetch news page. Status code: {response.status_code}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get available news categories
    topics = get_bbc_news_topics()

    if not topics:
        print("No news categories found. Exiting.")
    else:
        # Display available news categories to the user
        print("Available news categories:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a news category
        try:
            user_choice = input("Enter the number corresponding to the desired news category: ")
            selected_topic = topics[int(user_choice) - 1]
            news_category_url = f'https://www.bbc.com{selected_topic[2]}'

            # Replace 'your_link_class' with the actual class name for the links to other pages
            classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

            # Ask the user to input the statement for verification
            user_statement = input("Enter the statement to verify: ")

            # Scrape content and verify statement
            scrape_and_verify(user_statement, news_category_url, classes_to_check)

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid news category number.")


Available news categories:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired news category: 5
Enter the statement to verify: uk
The statement 'uk' is not found. Similar data includes:
Class 'gclMev': ['She also told detectives she debated handing herself in two weeks after their baby died.', "The government says it will help 'minimise disruption and improve behaviour in classrooms' in England.", 'A unique case raised complex questions about whether an asylum seeker could be blamed for drownings in 2022.', 'A man has been found guilty of the manslaughter of four migrants in the English Channel.', "

In [None]:
import requests
from bs4 import BeautifulSoup
from difflib import get_close_matches

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = elements

    return scraped_data

def find_links_in_context(user_statement, soup, classes_to_check):
    links = []

    for class_name in classes_to_check:
        elements = soup.find_all(class_=class_name)
        for element in elements:
            if user_statement.lower() in element.get_text(strip=True).lower():
                link = element.find('a')
                if link:
                    links.append(link.get('href'))

    return links

def compare_statement(statement, scraped_data):
    matches = get_close_matches(statement.lower(), [data.get_text(strip=True).lower() for data_list in scraped_data.values() for data in data_list], n=1, cutoff=0.8)

    if matches:
        for class_name, data_list in scraped_data.items():
            for data in data_list:
                if matches[0] in data.get_text(strip=True).lower():
                    return class_name, data
    return None, None

def scrape_and_verify(user_statement, url, classes_to_check):
    try:
        # Make a request for the selected news page
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')

            # Scrape elements with specified classes
            scraped_data = scrape_with_classes(soup, classes_to_check)

            # Find links in the context of the user's statement
            links = find_links_in_context(user_statement, soup, classes_to_check)

            # Compare the user's statement with the scraped data
            matched_class, matched_data = compare_statement(user_statement, scraped_data)

            if matched_class is not None and matched_data is not None:
                print(f"The statement '{user_statement}' is found in the context of:")
                print(matched_data.get_text(strip=True))
                print("Statement verified and legitimate!")
                if links:
                    print(f"Links related to the statement:")
                    for link in links:
                        print(link)
            else:
                if scraped_data:
                    print(f"The statement '{user_statement}' is not found. Similar data includes:")
                    for class_name, data_list in scraped_data.items():
                        for data in data_list:
                            print(f"Context: {data.get_text(strip=True)}")
                            link = data.find('a')
                            if link:
                                print(f"Link: {link.get('href')}")
                    if links:
                        print(f"Links related to the statement:")
                        for link in links:
                            print(link)
                else:
                    print("No data found on the page.")

        else:
            print(f"Error: Unable to fetch news page. Status code: {response.status_code}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get available news categories
    topics = get_bbc_news_topics()

    if not topics:
        print("No news categories found. Exiting.")
    else:
        # Display available news categories to the user
        print("Available news categories:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a news category
        try:
            user_choice = input("Enter the number corresponding to the desired news category: ")
            selected_topic = topics[int(user_choice) - 1]
            news_category_url = f'https://www.bbc.com{selected_topic[2]}'

            # Replace 'your_link_class' with the actual class name for the links to other pages
            classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

            # Ask the user to input the statement for verification
            user_statement = input("Enter the statement to verify: ")

            # Scrape content and verify statement
            scrape_and_verify(user_statement, news_category_url, classes_to_check)

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid news category number.")


Available news categories:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired news category: 1
Enter the statement to verify: Tala and Yazid Relief and guilt after Gazans find safety in Egypt Tala Abu Nahla and her family have finally made it to Cairo - now the work to rebuild their lives begins.
The statement 'Tala and Yazid Relief and guilt after Gazans find safety in Egypt Tala Abu Nahla and her family have finally made it to Cairo - now the work to rebuild their lives begins.' is not found. Similar data includes:
Context: Israel's military says it hit weapons depots in response to a drone launc

In [None]:
import requests
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def find_parent_link(element):
    # Traverse the ancestors to find an href
    current_element = element
    while current_element:
        if 'href' in current_element.attrs:
            return current_element['href']
        current_element = current_element.find_parent()

    return 'Link not found'

def compare_statement(user_statement, scraped_data, soup):
    matches = {}
    for class_name, data_list in scraped_data.items():
        for data in data_list:
            # Use fuzzy matching to handle typos or errors
            similarity_ratio = fuzz.token_set_ratio(user_statement.lower(), data.lower())
            if similarity_ratio > 90:  # Adjust the threshold as needed
                matches[class_name] = {
                    'context': data,
                    'similarity_ratio': similarity_ratio,
                    'parent_link': find_parent_link(soup.find(class_=class_name)),
                }

    return matches

def print_matched_results(matches):
    for class_name, match_info in matches.items():
        context = match_info['context']
        similarity_ratio = match_info['similarity_ratio']
        parent_link = match_info['parent_link']
        print(f"Context: {context} - Similarity: {similarity_ratio}% - Parent Link: {parent_link}")

def scrape_and_verify(user_statement, news_category_url, classes_to_check):
    # Make a request for the news category page
    news_category_response = requests.get(news_category_url)

    if news_category_response.status_code == 200:
        soup = BeautifulSoup(news_category_response.text, 'html.parser')

        # Scrape elements with specified classes
        scraped_data = scrape_with_classes(soup, classes_to_check)

        # Compare the user's statement with the scraped data
        matches = compare_statement(user_statement, scraped_data, soup)

        if matches:
            print("The following contexts were found:")
            print_matched_results(matches)
            print("Statement verified and legitimate!")
        else:
            print("No matching context found. The statement might not be legitimate.")

    else:
        print(f"Error: Unable to fetch news category page. Status code: {news_category_response.status_code}")

if __name__ == "__main__":
    # Get available news categories
    topics = get_bbc_news_topics()

    if not topics:
        print("No news categories found. Exiting.")
    else:
        # Display available news categories to the user
        print("Available news categories:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a news category
        try:
            user_choice = input("Enter the number corresponding to the desired news category: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            news_category_url = f'https://www.bbc.com{selected_topic[2]}'

            # Ask the user to enter a statement for verification
            user_statement = input("Enter the statement to verify: ")

            # Specify classes to check on the news pages
            classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

            # Scrape and verify the user's statement
            scrape_and_verify(user_statement, news_category_url, classes_to_check)

        except (ValueError, IndexError):
            print("Invalid choice.")


Available news categories:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired news category: 2
You selected: War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
Enter the statement to verify: ukraine russia
The following contexts were found:
Context: With Western supplies to Ukraine faltering, Russia is making its size matter but the outcome is far from certain. - Similarity: 100% - Parent Link: /news/uk-england-merseyside-68336591
Context: Is Russia turning the tide in Ukraine? - Similarity: 100% - Parent Link: /news/uk-england-merseyside-68336591
Context: With Western supplies to Ukrain

In [None]:
import requests
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def compare_statement(statement, scraped_data):
    matched_data = []
    for class_name, data_list in scraped_data.items():
        for data in data_list:
            ratio = fuzz.partial_ratio(statement.lower(), data.lower())
            if ratio >= 80:
                matched_data.append((class_name, data))

    return matched_data

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scraped_data = scrape_with_classes(soup, classes_to_check)

                # Ask the user to enter a statement for comparison
                user_statement = input("Enter the statement to verify: ")

                # Compare the user's statement with the scraped data
                matched_data = compare_statement(user_statement, scraped_data)

                if matched_data:
                    print("Matching contexts found:")
                    for match in matched_data:
                        class_name, data = match
                        parent_link = soup.find(class_=class_name).find_parent('a')['href']
                        print(f"Class: {class_name}, Data: {data}, Parent Link: https://www.bbc.com{parent_link}")
                else:
                    print("No matching context found. The statement might not be legitimate.")

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 4
You selected: US & Canada - URL: https://www.bbc.com/news/us-canada
Enter the statement to verify: canada
Matching contexts found:
Class: jqwZKz, Data: Canada's wildfire season is not over, with dozens of winter fires still burning beneath the soil., Parent Link: https://www.bbc.com/news/world-us-canada-68336601
Class: kTrQIN, Data: 'Zombie Fires' burning at an alarming rate in Canada, Parent Link: https://www.bbc.com/news/world-us-canada-68336601


In [None]:
import requests
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def compare_statement(statement, scraped_data):
    matched_data = []
    for class_name, data_list in scraped_data.items():
        for data in data_list:
            ratio = fuzz.partial_ratio(statement.lower(), data.lower())
            if ratio >= 80:
                parent_link = soup.find(string=data).find_parent('a')['href']
                matched_data.append((f"https://www.bbc.com{parent_link}", data))

    return matched_data

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scraped_data = scrape_with_classes(soup, classes_to_check)

                # Ask the user to enter a statement for comparison
                user_statement = input("Enter the statement to verify: ")

                # Compare the user's statement with the scraped data
                matched_data = compare_statement(user_statement, scraped_data)

                if matched_data:
                    print("Matching contexts found:")
                    for link, data in matched_data:
                        print(f"Link: {link} - Matched: {data}")
                else:
                    print("No matching context found. The statement might not be legitimate.")

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 7
You selected: BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the statement to verify: President Putin waving at Moscow Victory Day parade Putin's Victory Day speech fact-checked President Putin made several claims about Ukraine and Nato in his speech at a military parade in Moscow.
Matching contexts found:
Link: https://www.bbc.com/news/61379405 - Matched: President Putin made several claims about Ukraine and Nato in his speech at a military parade in Moscow.
Link: https://www.bbc.com/news/61379405 - Match

In [None]:
import requests
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz

def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics

    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

def compare_statement(statement, scraped_data, soup):
    matched_data = []
    for class_name, data_list in scraped_data.items():
        for data in data_list:
            ratio = fuzz.partial_ratio(statement.lower(), data.lower())
            if ratio >= 80:
                parent_link = soup.find(string=data).find_parent('a')
                if parent_link:
                    href = parent_link.get('href')
                    matched_data.append((f"https://www.bbc.com{href}", data))

    return matched_data

if __name__ == "__main__":
    # Get available topics
    topics = get_bbc_news_topics()

    if not topics:
        print("No topics found. Exiting.")
    else:
        # Display available topics to the user
        print("Available topics:")
        for topic in topics:
            print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

        # Ask the user to choose a topic
        try:
            user_choice = input("Enter the number corresponding to the desired topic: ")
            selected_topic = topics[int(user_choice) - 1]

            print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
            topic_url = f'https://www.bbc.com{selected_topic[2]}'

            # Make a request for the topic page
            topic_response = requests.get(topic_url)

            if topic_response.status_code == 200:
                soup = BeautifulSoup(topic_response.text, 'html.parser')

                # Classes to check
                classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

                # Scrape elements with specified classes
                scraped_data = scrape_with_classes(soup, classes_to_check)

                # Ask the user to enter a statement for comparison
                user_statement = input("Enter the statement to verify: ")

                # Compare the user's statement with the scraped data
                matched_data = compare_statement(user_statement, scraped_data, soup)

                if matched_data:
                    print("Matching contexts found:")
                    for link, data in matched_data:
                        print(f"Link: {link} - Matched: {data}")
                else:
                    print("No matching context found. The statement might not be legitimate.")

            else:
                print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

        except (ValueError, IndexError):
            print("Invalid choice. Please enter a valid topic number.")


Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 7
You selected: BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the statement to verify: bbc verify
Matching contexts found:
Link: https://www.bbc.com/news/world-middle-east-68306915 - Matched: BBC Verify has analysed the latest satellite image from the city in southern Gaza.
Link: https://www.bbc.com/news/world-middle-east-68159939 - Matched: BBC Verify analysis shows how Houthi tactics have changed, as commercial shipping traffic in the Red Sea halves.
Link: https://www.bbc.com/news/world-middle-east-683069

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load Dolly model and tokenizer
dolly_model = GPT2LMHeadModel.from_pretrained('EleutherAI/gpt-j-6B')
dolly_tokenizer = GPT2Tokenizer.from_pretrained('EleutherAI/gpt-j-6B')

# Set up GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dolly_model.to(device)


You are using a model of type gptj to instantiate a model of type gpt2. This is not supported for all configurations of models and can yield errors.


pytorch_model.bin:   0%|          | 0.00/24.2G [00:00<?, ?B/s]

OSError: [Errno 28] No space left on device

In [None]:
# Install necessary libraries
!pip install transformers

# Import required modules
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# Define the RoBERTa-based question-answering pipeline
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2", tokenizer="deepset/roberta-base-squad2")

# Continue with the rest of your code...


# Import required modules
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# Function to get BBC News topics
def get_bbc_news_topics():
    url = 'https://www.bbc.com/news'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        topics = []

        # Find topics under the 'News' section
        news_section = soup.find('a', {'href': '/news'})
        if news_section:
            topic_list = news_section.find_next('ul')
            if topic_list:
                topics = [(index + 1, topic.text, topic['href']) for index, topic in enumerate(topic_list.find_all('a'))]

        return topics
    else:
        print(f"Error: Unable to fetch topics. Status code: {response.status_code}")
        return []

# Function to scrape data with specified classes
def scrape_with_classes(soup, classes):
    scraped_data = {}
    for class_name in classes:
        elements = soup.find_all(class_=class_name)
        if elements:
            scraped_data[class_name] = [element.get_text(strip=True) for element in elements]

    return scraped_data

# Function to generate responses using the RoBERTa model for question answering
def generate_response_roberta(question, context):
    answer = qa_pipeline(question=question, context=context)
    return answer['answer']

# Scrape BBC news topics
topics = get_bbc_news_topics()

if not topics:
    print("No topics found. Exiting.")
else:
    # Display available topics to the user
    print("Available topics:")
    for topic in topics:
        print(f"{topic[0]}. {topic[1]} - URL: https://www.bbc.com{topic[2]}")

    # Ask the user to choose a topic
    try:
        user_choice = input("Enter the number corresponding to the desired topic: ")
        selected_topic = topics[int(user_choice) - 1]

        print(f"You selected: {selected_topic[1]} - URL: https://www.bbc.com{selected_topic[2]}")
        topic_url = f'https://www.bbc.com{selected_topic[2]}'

        # Make a request for the topic page
        topic_response = requests.get(topic_url)

        if topic_response.status_code == 200:
            soup = BeautifulSoup(topic_response.text, 'html.parser')

            # Classes to check
            classes_to_check = ["gclMev", "dEAAFJ", "czRLo", "jqwZKz", "kTrQIN", "dJMMNx", "crzIlm"]

            # Scrape elements with specified classes
            scraped_data = scrape_with_classes(soup, classes_to_check)

            # Ask the user to enter a question for RoBERTa-based question answering
            user_question = input("Enter a question about the selected topic: ")

            # Use RoBERTa for question answering on the user's question and scraped data
            for class_name, data_list in scraped_data.items():
                for data in data_list:
                    question = f"What is {data} about?"
                    answer = generate_response_roberta(question, user_question)
                    print(f"Question: {question}")
                    print(f"Answer: {answer}")
                    print()

        else:
            print(f"Error: Unable to fetch topic page. Status code: {topic_response.status_code}")

    except (ValueError, IndexError):
        print("Invalid choice. Please enter a valid topic number.")




config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Available topics:
1. Israel-Gaza War - URL: https://www.bbc.com/news/topics/c2vdnvdg6xxt
2. War in Ukraine - URL: https://www.bbc.com/news/war-in-ukraine
3. World - URL: https://www.bbc.com/news/world
4. US & Canada - URL: https://www.bbc.com/news/us-canada
5. UK - URL: https://www.bbc.com/news/uk
6. In Pictures - URL: https://www.bbc.com/news/in_pictures
7. BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter the number corresponding to the desired topic: 7
You selected: BBC Verify - URL: https://www.bbc.com/news/reality_check
Enter a question about the selected topic: BBC Verify has analysed the latest satellite image from the city in southern Gaza.
Question: What is The prime minister said five priorities should be used to hold his government to account. about?
Answer: Gaza.

Question: What is New data suggests that the cost of living has been rising faster than official inflation figures. about?
Answer: satellite image from the city in southern Gaza

Question: What is The