In [1]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.chunk import tree2conlltags

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Function to map NLTK's generic NE labels to more specific types
def map_labels(ne_tree):
    ne_mapping = {
        'PERSON': 'PERSON',
        'ORGANIZATION': 'ORGANIZATION',
        'GPE': 'LOCATION'
    }
    for subtree in ne_tree:
        if type(subtree) == nltk.tree.Tree:
            label = subtree.label()
            if label in ne_mapping:
                yield ' '.join([word for word, pos in subtree.leaves()]), ne_mapping[label]

# Function to perform Named Entity Recognition
def recognize_entities(input_file, output_file):
    with open(input_file, 'r') as f:
        text = f.read()

    # Tokenize the text and perform part-of-speech tagging
    words = word_tokenize(text)
    tagged_words = pos_tag(words)

    # Perform Named Entity Recognition and map labels
    ne_tree = ne_chunk(tagged_words)
    entities = list(map_labels(ne_tree))

    # Write recognized entities to the output file
    with open(output_file, 'w') as f:
        for entity, entity_type in entities:
            f.write(f"Entity: {entity}, Type: {entity_type}\n")

# Input and output file paths
input_file = 'C:\\Users\\Chaithra.k\\OneDrive\\Desktop\\input_text.txt'  # Update with your input file path
output_file = 'C:\\Users\\Chaithra.k\\OneDrive\\Desktop\\output_entities.txt'  # Update with your output file path

# Call the function to recognize entities
recognize_entities(input_file, output_file)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Chaithra.k\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Chaithra.k\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\Chaithra.k\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Chaithra.k\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
