In [5]:
import nltk

# Sentence to be POS tagged
sentence = "The cat is sitting on the mat."

# Tokenize the sentence into words
tokens = nltk.word_tokenize(sentence)

# Perform POS tagging
pos_tags = nltk.pos_tag(tokens)

# Display the POS tags
for word, tag in pos_tags:
    print(f"{word}: {tag}")

The: DT
cat: NN
is: VBZ
sitting: VBG
on: IN
the: DT
mat: NN
.: .


In [8]:
#challenges with pos tagging
#ambiguity
import nltk

sentence1 = "The bank can provide a loan."
sentence2 = "I need to bank the money."
tokens1 = nltk.word_tokenize(sentence1)
pos_tags1 = nltk.pos_tag(tokens1)
tokens2 = nltk.word_tokenize(sentence2)
pos_tags2 = nltk.pos_tag(tokens2)

print(sentence1)
print(pos_tags1)
print(sentence2)
print(pos_tags2)

The bank can provide a loan.
[('The', 'DT'), ('bank', 'NN'), ('can', 'MD'), ('provide', 'VB'), ('a', 'DT'), ('loan', 'NN'), ('.', '.')]
I need to bank the money.
[('I', 'PRP'), ('need', 'VBP'), ('to', 'TO'), ('bank', 'NN'), ('the', 'DT'), ('money', 'NN'), ('.', '.')]


In [11]:
#words out of vocabulary
import nltk

sentence = "I love eating frozzles."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)

print(sentence)
print(pos_tags)

I love eating frozzles.
[('I', 'PRP'), ('love', 'VBP'), ('eating', 'VBG'), ('frozzles', 'NNS'), ('.', '.')]


In [12]:
#tagging error
import nltk

sentence = "I have a pen and an apple."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)

print(sentence)
print(pos_tags)

I have a pen and an apple.
[('I', 'PRP'), ('have', 'VBP'), ('a', 'DT'), ('pen', 'NN'), ('and', 'CC'), ('an', 'DT'), ('apple', 'NN'), ('.', '.')]


In [14]:
#lack of context
import nltk

sentence = "I saw a bat."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)

print(sentence)
print(pos_tags)

I saw a bat.
[('I', 'PRP'), ('saw', 'VBD'), ('a', 'DT'), ('bat', 'NN'), ('.', '.')]


In [15]:
#Homographs and Homonyms
import nltk

sentence = "The band played at the band."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)

print(sentence)
print(pos_tags)


The band played at the band.
[('The', 'DT'), ('band', 'NN'), ('played', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('band', 'NN'), ('.', '.')]


In [3]:
import nltk
# Sentence to be NER tagged
sentence = "Apple Inc. is planning to open a new store in New York City."

# Tokenize the sentence into words
tokens = nltk.word_tokenize(sentence)

# Perform NER tagging
ner_tags = nltk.ne_chunk(nltk.pos_tag(tokens))

# Display the NER tags
for entity in ner_tags:
    if hasattr(entity, 'label'):
        print(f"{entity.label()}: {' '.join(child[0] for child in entity)}")


PERSON: Apple
ORGANIZATION: Inc.
GPE: New York City


In [3]:
#NER evaluation with MUC
def muc_evaluation(reference, predicted):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for ref_entities, pred_entities in zip(reference, predicted):
        ref_set = set(ref_entities)
        pred_set = set(pred_entities)

        for entity in pred_set:
            if entity in ref_set:
                true_positives += 1
            else:
                false_positives += 1

        for entity in ref_set:
            if entity not in pred_set:
                false_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1_score = 2 * ((precision * recall) / (precision + recall))

    return precision, recall, f1_score

# Example usage
reference = [
    ['B-PER', 'I-PER', 'O', 'B-LOC', 'I-LOC'],
    ['O', 'O', 'B-ORG', 'I-ORG', 'O', 'B-PER'],
]

predicted = [
    ['B-PER', 'I-PER', 'O', 'B-LOC', 'I-LOC'],
    ['O', 'O', 'B-ORG', 'I-ORG', 'O', 'B-LOC'],
]

precision, recall, f1_score = muc_evaluation(reference, predicted)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)



Precision: 0.8888888888888888
Recall: 0.8888888888888888
F1 Score: 0.8888888888888888
