<a href="https://colab.research.google.com/github/justinmjoseph2/NLP3/blob/main/nlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install spacy nltk requests




In [6]:
import nltk
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('averaged_perceptron_tagger') # Download the missing resource

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [7]:
import requests
import spacy
import nltk
from nltk import ne_chunk, pos_tag, word_tokenize
from nltk.tree import Tree

# Function to fetch news article
def fetch_news(api_key):
    url = f'https://newsapi.org/v2/top-headlines?country=us&apiKey={api_key}'
    response = requests.get(url)
    data = response.json()

    # Extract the content of the first article
    if data['articles']:
        article = data['articles'][0]['content']
        return article
    return None

# Function to extract entities using SpaCy
def extract_entities_spacy(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Function to extract entities using NLTK
def extract_entities_nltk(text):
    words = nltk.word_tokenize(text)
    tagged = nltk.pos_tag(words)
    chunked = ne_chunk(tagged)

    entities = []
    for chunk in chunked:
        if hasattr(chunk, 'label'):
            entity = ' '.join(c[0] for c in chunk)
            entity_type = chunk.label()
            entities.append((entity, entity_type))
    return entities

# Main function
def main():
    # Replace with your own API key
    api_key = 'ea6227da22be4881bd9a57667f067506'
    news_article = fetch_news(api_key)

    if news_article:
        print("News Article:", news_article)

        # Extract entities using SpaCy
        spacy_entities = extract_entities_spacy(news_article)
        print("\nEntities extracted by SpaCy:")
        for entity in spacy_entities:
            print(entity)

        # Extract entities using NLTK
        nltk_entities = extract_entities_nltk(news_article)
        print("\nEntities extracted by NLTK:")
        for entity in nltk_entities:
            print(entity)

        # Compare and analyze results
        compare_results(spacy_entities, nltk_entities)

    else:
        print("No news article found.")

# Function to compare results
def compare_results(spacy_entities, nltk_entities):
    print("\nComparison of Named Entities:")
    print(f"Number of entities extracted by SpaCy: {len(spacy_entities)}")
    print(f"Number of entities extracted by NLTK: {len(nltk_entities)}")

    # Finding common and unique entities
    spacy_set = set(spacy_entities)
    nltk_set = set(nltk_entities)

    common_entities = spacy_set & nltk_set
    unique_to_spacy = spacy_set - nltk_set
    unique_to_nltk = nltk_set - spacy_set

    print(f"Common entities: {len(common_entities)}")
    print(f"Entities unique to SpaCy: {len(unique_to_spacy)}")
    print(f"Entities unique to NLTK: {len(unique_to_nltk)}")

if __name__ == "__main__":
    main()


News Article: David Montgomery is the Detroit Lions closer.
Dan Campbell said as much Friday, praising Montgomerys ability to set a tone to start games and finish out strong when others tire, and the running back… [+3979 chars]

Entities extracted by SpaCy:
('David Montgomery', 'PERSON')
('Detroit Lions', 'ORG')
('Dan Campbell', 'PERSON')
('Friday', 'DATE')

Entities extracted by NLTK:
('David', 'PERSON')
('Montgomery', 'ORGANIZATION')
('Detroit Lions', 'ORGANIZATION')
('Dan Campbell', 'PERSON')
('Montgomerys', 'PERSON')

Comparison of Named Entities:
Number of entities extracted by SpaCy: 4
Number of entities extracted by NLTK: 5
Common entities: 1
Entities unique to SpaCy: 3
Entities unique to NLTK: 4
