In [None]:
pip install spacy
pip install requests
python -m spacy download en_core_web_sm
pip install pandas matplotlib

In [None]:
#Outputs Raw Data

import spacy
import requests
from bs4 import BeautifulSoup

# Load the English NER model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities from HTML content
def extract_named_entities_from_url(url):
    try:
        # Send an HTTP GET request to fetch the webpage content
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract text from the HTML
        text = soup.get_text()

        # Process the text with spaCy NER
        doc = nlp(text)

        # Extract and categorize named entities
        named_entities = {}
        for ent in doc.ents:
            named_entities[ent.text] = ent.label_

        return named_entities

    except Exception as e:
        return {"error": str(e)}

# User input for the URL
url = input("Enter the URL of the webpage (HTML): ")

# Call the function to extract named entities
entities = extract_named_entities_from_url(url)

# Display the extracted named entities
if "error" in entities:
    print("An error occurred:", entities["error"])
else:
    print("Named Entities:")
    for entity, label in entities.items():
        print(f"{entity} - {label}")


In [None]:
#Outputs Visualizations

import spacy
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt

# Load the English NER model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities from HTML content
def extract_named_entities_from_url(url):
    try:
        # Send an HTTP GET request to fetch the webpage content
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract text from the HTML
        text = soup.get_text()

        # Process the text with spaCy NER
        doc = nlp(text)

        # Create a list to store named entities
        named_entities = []

        for ent in doc.ents:
            named_entities.append((ent.text, ent.label_))

        return named_entities

    except Exception as e:
        return {"error": str(e)}

# User input for the URL
url = input("Enter the URL of the webpage (HTML): ")

# Call the function to extract named entities
entities = extract_named_entities_from_url(url)

# Display the extracted named entities in a table
if "error" in entities:
    print("An error occurred:", entities["error"])
else:

    # Create a Pandas DataFrame for better data manipulation
    df = pd.DataFrame(entities, columns=["Entity", "Category"])
    
    # Group entities by category
    grouped_entities = df.groupby("Category").count().reset_index()

    # Create a basic bar plot to visualize entity categories
    plt.figure(figsize=(10, 6))
    plt.bar(grouped_entities["Category"], grouped_entities["Entity"])
    plt.xlabel("Category")
    plt.ylabel("Count")
    plt.title("Named Entity Categories")
    plt.show()

    print("Named Entities:")
    print(df)