In [1]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

# Download NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Function to perform named entity recognition (NER) and extract named entities
def extract_entities(text):
    words = word_tokenize(text)
    tagged = pos_tag(words)
    entities = ne_chunk(tagged)
    named_entities = []
    for entity in entities:
        if isinstance(entity, nltk.tree.Tree):
            named_entity = " ".join([word for word, tag in entity.leaves()])
            named_entities.append((named_entity, entity.label()))
    return named_entities

# Function to perform reference resolution within a given text
def resolve_references(text):
    named_entities = extract_entities(text)
    resolved_text = text
    for entity, entity_type in named_entities:
        if entity_type == 'GPE':
            resolved_text = resolved_text.replace(entity, 'PLACE')
        elif entity_type == 'ORGANIZATION':
            resolved_text = resolved_text.replace(entity, 'ORGANIZATION')
    return resolved_text

# Example text
text = "Harvard University, located in Cambridge, Massachusetts, is a prestigious institution."

# Extract named entities
entities = extract_entities(text)
print("Named Entities:")
for entity, entity_type in entities:
    print(f"{entity} - {entity_type}")

# Perform reference resolution
resolved_text = resolve_references(text)
print("\nReference Resolution:")
print(resolved_text)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


Named Entities:
Harvard - ORGANIZATION
University - GPE
Cambridge - GPE
Massachusetts - GPE

Reference Resolution:
ORGANIZATION PLACE, located in PLACE, PLACE, is a prestigious institution.
