In [None]:
# Boilerplate: This block goes into every notebook.
# It sets up the environment, installs the requirements, and checks for the required environment variables.

from IPython.display import clear_output
import os

requirements_installed = False
max_retries = 3
retries = 0


def install_requirements():
    """Installs the requirements from requirements.txt file"""
    global requirements_installed, retries
    if requirements_installed:
        print("Requirements already installed.")
        return

    print("Installing requirements...")
    install_status = os.system("pip install -r requirements.txt")
    if install_status == 0:
        print("Requirements installed successfully.")
        requirements_installed = True
    else:
        print("Failed to install requirements.")
        if retries < max_retries:
            print("Retrying...")
            retries += 1
            return install_requirements()
        exit(1)
    return


install_requirements()
clear_output()
print("🚀 Setup complete. Continue to the next cell.")

In [None]:
from dotenv import load_dotenv

REQUIRED_ENV_VARS = ["ANTHROPIC_API_KEY", "FIRECRAWL_API_KEY"]


def setup_env():
    """Sets up the environment variables"""
    global REQUIRED_ENV

    def check_env(env_var):
        value = os.getenv(env_var)
        if value is None:
            print(f"Please set the {env_var} environment variable.")
            exit(1)
        else:
            print(f"{env_var} is set.")

    load_dotenv(override=True, dotenv_path=".env")

    variables_to_check = REQUIRED_ENV_VARS

    for var in variables_to_check:
        check_env(var)

    print("Environment variables are set.")


setup_env()

In [None]:
from firecrawl import FirecrawlApp


def get_firecrawl_client():
    return FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))

In [None]:
import traceback
from typing import Union
from litellm import completion


class NewsFetcher:
    """
    Fetches news articles from the given URL.

    """

    DEFAULT_LLM_MODEL = "anthropic/claude-3-5-sonnet-latest"
    AUSPICIOUS_NUMBER = 11
    DEFAULT_TEMPERATURE = 1 - AUSPICIOUS_NUMBER

    def __init__(self, model=DEFAULT_LLM_MODEL):
        self.crawler = get_firecrawl_client()
        self.model = model

    def is_news_article(self, page_markdown: str) -> bool:
        """
        Checks if the given URL is a news article.

        Args:
            url (str): The URL to check.

        Returns:
            bool: True if the URL is a news article, False otherwise.
        """
        try:

            user_prompt = f"""
                Is this a news article?
                Page: '''{page_markdown}'''.
                Respond with 0 for No and 1 for Yes and nothing else.
            """
            response = completion(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": user_prompt,
                    }
                ],
            )
            result = response.choices[0].message.content

            if "0" in result:
                return False

            if "1" in result:
                return True

            raise Exception("Failed to determine if the URL is a news article.")
        except Exception as e:
            print(f"Failed to check if the URL is a news article: {e}")
            traceback.print_exc()
            return False

    def get_news_article(self, url: str) -> Union[str, None]:
        """
        Fetches the news article from the given URL in markdown format.

        Args:
            url (str): The URL of the news article.

        Returns:
            str: The news article in markdown format.
        """
        try:
            app = get_firecrawl_client()
            scrape_result = app.scrape_url(url, params={"formats": ["markdown"]})
            if not scrape_result:
                return None

            success = scrape_result["metadata"]["statusCode"] == 200

            if not success:
                print(f"Failed to get docs from URL: {url}")
                return None

            page_markdown = scrape_result["markdown"]

            if not self.is_news_article(page_markdown):
                print("The given URL is not a news article.")
                return None

            return page_markdown
        except Exception as e:
            print(f"Failed to fetch news article: {e}")
            traceback.print_exc()
            return None

In [None]:
from IPython.display import Markdown, display

news_fetcher = NewsFetcher()

url = "https://indianexpress.com/article/education/cbse-dual-board-exam-class-10-2026-9856208/"

markdown = news_fetcher.get_news_article(url)

if markdown:
    display(Markdown(markdown))
else:
    print("Failed to fetch the news article.")

In [None]:
import traceback
from litellm import completion, supports_response_schema
from pydantic import BaseModel
import json


class FakeNewsDetectionResponse(BaseModel):
    """
    Response from the fake news detection API.
    """

    is_fake: bool
    explanation: str


class FakeNewsDetector:
    """
    Fake news detector using the LLM model.
    """

    DEFAULT_LLM_MODEL = "anthropic/claude-3-5-sonnet-latest"
    AUSPICIOUS_NUMBER = 0.108
    DEFAULT_TEMPERATURE = 1 - AUSPICIOUS_NUMBER

    def __init__(self, model=DEFAULT_LLM_MODEL, news_fetcher_model=DEFAULT_LLM_MODEL):
        self.model = model
        self.news_fetcher = NewsFetcher(model=news_fetcher_model)

    def detect_fake_news(self, url: str) -> FakeNewsDetectionResponse:
        """
        Determines if the given news article is fake.

        Args:
            news_article (str): The news article in markdown format.

        Returns:
            bool: True if the news article is fake, False otherwise.
        """
        try:
            # assert supports_response_schema(model=self.DEFAULT_LLM_MODEL)
            news_article = self.news_fetcher.get_news_article(url)
            if not news_article:
                print("Failed to fetch the news article.")
                raise Exception("Failed to fetch the news article.")

            user_prompt = f"""
                Is this news article fake?
                Article: '''{news_article}'''.
                Respond with 0 for No and 1 for Yes and nothing else.
                Also provide a brief explanation.
            """
            response = completion(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": user_prompt,
                    }
                ],
                response_format=FakeNewsDetectionResponse,
            )
            result_json = response.choices[0].message.content
            result = json.loads(result_json)
            return FakeNewsDetectionResponse(**result)
        except Exception as e:
            print(f"Failed to check if the news article is fake: {e}")
            traceback.print_exc()
            raise e

In [None]:
import traceback
from litellm import completion, supports_response_schema
from pydantic import BaseModel
import json


class FactCheckResult(BaseModel):
    """
    Fact check result.
    """

    fact: str
    authenticity_score: float
    explanation: str


class MisinformationDetectionResponse(BaseModel):
    """
    Response from the fake news detection API.
    """

    misinformation_score: float
    explanation: str
    linguistic_features: str
    fact_check: list[FactCheckResult]


class MisinformationDetector:
    """
    Fake news detector using the LLM model.
    """

    DEFAULT_LLM_MODEL = "anthropic/claude-3-5-sonnet-latest"
    AUSPICIOUS_NUMBER = 0.108
    DEFAULT_TEMPERATURE = 1 - AUSPICIOUS_NUMBER

    def __init__(self, model=DEFAULT_LLM_MODEL, news_fetcher_model=DEFAULT_LLM_MODEL):
        self.model = model
        self.news_fetcher = NewsFetcher(model=news_fetcher_model)

    def detect_misinformation(self, url: str) -> FakeNewsDetectionResponse:
        """
        Determines if the given news article is fake.

        Args:
            news_article (str): The news article in markdown format.

        Returns:
            bool: True if the news article is fake, False otherwise.
        """
        try:
            # assert supports_response_schema(model=self.DEFAULT_LLM_MODEL)
            news_article = self.news_fetcher.get_news_article(url)
            if not news_article:
                print("Failed to fetch the news article.")
                raise Exception("Failed to fetch the news article.")

            user_prompt = f"""
                Detect misinformation in the news article.
                Article: '''{news_article}'''.
                Make sure the following are included:
                - Misinformation score: The likelihood of misinformation.
                - Explanation: Explanation of the score.
                - Linguistic features: The linguistic features that indicate misinformation.
                - Fact check: The fact check results.
            """
            response = completion(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": user_prompt,
                    }
                ],
                response_format=MisinformationDetectionResponse,
            )
            result_json = response.choices[0].message.content
            result = json.loads(result_json)
            return MisinformationDetectionResponse(**result)
        except Exception as e:
            print(f"Failed to check if the news article is fake: {e}")
            traceback.print_exc()
            raise e

In [None]:
detector = MisinformationDetector()

url = "https://indianexpress.com/article/education/cbse-dual-board-exam-class-10-2026-9856208/"

response = detector.detect_misinformation(url)

print(response)