In [19]:
import matplotlib.pyplot as plt
import numpy as np
from IPython.core.display import HTML
from itertools import chain
from collections import Counter, defaultdict, namedtuple, OrderedDict
from pomegranate import State, HiddenMarkovModel, DiscreteDistribution
import os
from io import BytesIO
from itertools import chain
import random

In [20]:
def read_data(filename):
    """Read tagged sentence data"""
    with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
        sentence_lines = [l.split("\n") for l in f.read().split("\n\n")]
    return OrderedDict(((s[0], Sentence(*zip(*[l.strip().split("\t")
                        for l in s[1:]]))) for s in sentence_lines if s[0]))

def read_tags(filename):
    """Read a list of word tag classes"""
    with open(filename, 'r') as f:
        tags = f.read().split("\n")
    return frozenset(tags)

Sentence = namedtuple("Sentence", "words tags")

Let's read in the Brown corpus to see if our python code works out:

In [21]:
tag = read_tags("english.txt")
sentence1 = read_data("hindi.txt")
sentence1

OrderedDict([('b100-5507',
              Sentence(words=('वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ', 'वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ'), tags=('they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand', 'they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand'))),
             ('b100-935',
              Sentence(words=('वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ', 'आधा', 'किमी ', 'की ', 'लंबाई ', 'पर ', 'देश', 'विशेष', 'जहाँ ', 'केवल ', 'मां', 'के ', 'पट ', 'खुलने ', 'और ', 'बंद ', 'होने ', 'के ', 'दिन ', 'ही ', 'पूजा ', 'किया ', 'जाता ', 'है'), tags=('they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand', 'half', 'km', 'a', 'length', 'at', 'country', 'special', 'where', 'only', 'mother', 'is', 'door', 'opening', 'and', 'closing', 'done', 'is', 'day', 'only', 'pray', 'done', 'is', 'done'))),
             ('b100-

The class Dataset below incorporates our function above, reads in the Brown corpus and creates a collection of keys, a set of (unique) words, a sequence of words and a mirror sequence of tags as tuples, with N being the number of words in the Brown corpus.

Then it splits all this nice data into a training and test decomposition by using the Subset class defined further below, which mirrors the Dataset class.

In [22]:
class Dataset(namedtuple("_Dataset", "sentences keys vocab X tagset Y training_set testing_set N stream")):
    def __new__(cls, tagfile, datafile, train_test_split=0.8, seed=112890):
        tagset = read_tags(tagfile)
        sentences = read_data(datafile)
        keys = tuple(sentences.keys())
        wordset = frozenset(chain(*[s.words for s in sentences.values()]))
        word_sequences = tuple([sentences[k].words for k in keys])
        tag_sequences = tuple([sentences[k].tags for k in keys])
        N = sum(1 for _ in chain(*(s.words for s in sentences.values())))
        
        # split data into train/test sets
        _keys = list(keys)
        if seed is not None: random.seed(seed)
        random.shuffle(_keys)
        split = int(train_test_split * len(_keys))
        training_data = Subset(sentences, _keys[:split])
        testing_data = Subset(sentences, _keys[split:])
        stream = tuple(zip(chain(*word_sequences), chain(*tag_sequences)))
        return super().__new__(cls, dict(sentences), keys, wordset, word_sequences, tagset,
                               tag_sequences, training_data, testing_data, N, stream.__iter__)

    def __len__(self):
        return len(self.sentences)

    def __iter__(self):
        return iter(self.sentences.items())
    
    
class Subset(namedtuple("BaseSet", "sentences keys vocab X tagset Y N stream")):
    def __new__(cls, sentences, keys):
        word_sequences = tuple([sentences[k].words for k in keys])
        tag_sequences = tuple([sentences[k].tags for k in keys])
        wordset = frozenset(chain(*word_sequences))
        tagset = frozenset(chain(*tag_sequences))
        N = sum(1 for _ in chain(*(sentences[k].words for k in keys)))
        stream = tuple(zip(chain(*word_sequences), chain(*tag_sequences)))
        return super().__new__(cls, {k: sentences[k] for k in keys}, keys, wordset, word_sequences,
                               tagset, tag_sequences, N, stream.__iter__)

    def __len__(self):
        return len(self.sentences)

    def __iter__(self):
        return iter(self.sentences.items())

Let's read in the Brown corpus again, leveraging our classes above now, which order the corpus into efficiently navigable structures:

In [23]:
data = Dataset("english.txt", "hindi.txt", train_test_split=0.8)

print("There are {} sentences in the corpus.".format(len(data)))
print("There are {} sentences in the training set.".format(len(data.training_set)))
print("There are {} sentences in the testing set.".format(len(data.testing_set)))

assert len(data) == len(data.training_set) + len(data.testing_set), \
       "The number of sentences in the training set + testing set should sum to the number of sentences in the corpus"

There are 18 sentences in the corpus.
There are 14 sentences in the training set.
There are 4 sentences in the testing set.


X: Independent variable
Y: Dependent variable

In [24]:
for i in range(2):    
    print("Sentence {}:".format(i + 1), data.X[i])
    print()
    print("Labels {}:".format(i + 1), data.Y[i])
    print()

Sentence 1: ('वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ', 'वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ')

Labels 1: ('they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand', 'they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand')

Sentence 2: ('वे', 'हो', 'पर', 'एक', 'है', 'इस', 'करता है', 'ठीक कर', 'छह', 'पढ़ा', 'हाथ', 'आधा', 'किमी ', 'की ', 'लंबाई ', 'पर ', 'देश', 'विशेष', 'जहाँ ', 'केवल ', 'मां', 'के ', 'पट ', 'खुलने ', 'और ', 'बंद ', 'होने ', 'के ', 'दिन ', 'ही ', 'पूजा ', 'किया ', 'जाता ', 'है')

Labels 2: ('they', 'be', 'at', 'one', 'have', 'this', 'does', 'fix', 'six', 'read', 'hand', 'half', 'km', 'a', 'length', 'at', 'country', 'special', 'where', 'only', 'mother', 'is', 'door', 'opening', 'and', 'closing', 'done', 'is', 'day', 'only', 'pray', 'done', 'is', 'done')



In [25]:
# key = 'b100-87000'
# print("Sentence: {}".format(key))
# print("words:\n\t{!s}".format(data.sentences[key].words))
# print("tags:\n\t{!s}".format(data.sentences[key].tags))

Use Dataset.stream() to enumerate (word, tag) samples for the entire corpus.



Lets Enumerate first 7

In [26]:
words = [word for i, (word, tag) in enumerate(data.training_set.stream())]
tags = [tag for i, (word, tag) in enumerate(data.training_set.stream())]
words[0:7], tags[0:7]

(['सभी ', 'पर्व ', 'ओर ', 'त्योहार ', 'परस्पर ', 'प्रेम ', 'भाईचारा '],
 ['everyone', 'festivals', 'and', 'festival', 'mutual', 'love', 'brotherhood'])

#  POS Tagger using BOW Model

In [27]:
def pair_counts(tags, words):
    d = defaultdict(lambda: defaultdict(int))
    for tag, word in zip(tags, words):
        d[tag][word] += 1
    return d
        
word_counts = pair_counts(words, tags)

Let's produce a dictionary where words (keys) are associated with their most frequent Translated tag:

In [28]:
mfc_table = dict((word, max(tags.keys(), key=lambda key: tags[key])) for word, tags in word_counts.items())

In [29]:
i = 0
for key, value in mfc_table.items():
    print(key, value)
    i += 1
    if i > 3: break

सभी  everyone
पर्व  festivals
ओर  and
त्योहार  festival


Python namedtuple supports a type of container-like dictionary that, like dictionaries, contains keys that are hashed to particular values. But it supports both access from key values as well as iteration, the functionality that dictionaries lack.

Let's write a class that takes in a table in its constructor and adds <MISSING> English tags if the word is missing from the training set (possible that a word is in the test set but missing from the training set). It also has a viterbi method that takes in the table and builds a sequence of states that we will use in our Hidden Markov Model.

In [30]:
FakeState = namedtuple('FakeState', 'name')

class MFCTagger:
    missing = FakeState(name = '<MISSING>')
    
    def __init__(self, table):
        self.table = defaultdict(lambda: MFCTagger.missing)
        self.table.update({word: FakeState(name=tag) for word, tag in table.items()})
        
    def viterbi(self, seq):
        """This method simplifies predictions by matching the Pomegranate viterbi() interface"""
        return 0., list(enumerate(["<start>"] + [self.table[w] for w in seq] + ["<end>"]))

In [31]:
# Using Most frequent POS tag
mfc_model = MFCTagger(mfc_table)

In [32]:
def replace_unknown(sequence):
    return [w if w in data.training_set.vocab else 'nan' for w in sequence]

def simplify_decoding(X, model):    
    _, state_path = model.viterbi(replace_unknown(X))
    return [state[1].name for state in state_path[1:-1]]

In [33]:
for key in data.testing_set.keys[:2]:
    print("Sentence Key: {}\n".format(key))
    print("Sentence: {}\n".format(data.sentences[key].words))
    print("Predicted labels:\n-----------------")
    print(simplify_decoding(data.sentences[key].words, mfc_model))
    print()
    print("Actual labels:\n--------------")
    print(data.sentences[key].tags)
    print("\n")

Sentence Key: b100-27528

Sentence: ('इराक  ', 'के', 'विदेश', 'मंत्री', 'अमरीका', 'के', 'उस', 'प्रस्ताव', 'मजाक', 'उड़ाया', 'जिसमें', 'अमरीका', 'संयुक्त', 'राष्ट्र', 'के', 'प्रतिबंधों', 'इराकी', 'नागरिकों', 'के', 'लिए', 'कम', 'हानिकारक', 'बनाने', 'के', 'लिए', 'कहा', 'आतंकवाद', 'एक', 'ऐसी', 'समस्या', 'जिसने', 'न', 'केवल', 'भारत', 'अपितु', 'पूरे', 'विश्व', 'को', 'अपने', 'लपेटे', 'ले', 'रखा', 'है')

Predicted labels:
-----------------
['Iraq', 'for', 'foreign', 'minister', 'america', 'for', 'that', 'proposal', 'mock', 'made', 'in', 'america', 'united', 'nations', 'for', 'sanctions', 'Iraq', 'citizens', 'for', 'them', 'less', 'harmful', 'make', 'for', 'them', 'said', 'Terrorism', 'one', 'that', 'problem', 'which', 'not', 'only', 'India', 'but', 'whole', 'world', 'for', 'our', 'engulfed', 'take', 'kept', 'is']

Actual labels:
--------------
('Iraq', 'for', 'foreign', 'minister', 'america', 'for', 'that', 'proposal', 'mock', 'made', 'in', 'america', 'united', 'nations', 'for', 'sanctions', 'I

Pretty good! Let's evaluate the accuracy of the translator

In [34]:
def accuracy(X, Y, model):
    
    correct = total_predictions = 0
    for observations, actual_tags in zip(X, Y):
        
        # The model.viterbi call in simplify_decoding will return None if the HMM
        # raises an error (for example, if a test sentence contains a word that
        # is out of vocabulary for the training set). Any exception counts the
        # full sentence as an error (which makes this a conservative estimate).
        try:
            most_likely_tags = simplify_decoding(observations, model)
            correct += sum(p == t for p, t in zip(most_likely_tags, actual_tags))
        except:
            pass
        total_predictions += len(observations)
    return correct / total_predictions

In [35]:
mfc_training_acc = accuracy(data.training_set.X, data.training_set.Y, mfc_model)
print("training accuracy mfc_model: {:.2f}%".format(100 * mfc_training_acc))

mfc_testing_acc = accuracy(data.testing_set.X, data.testing_set.Y, mfc_model)
print("testing accuracy mfc_model: {:.2f}%".format(100 * mfc_testing_acc))

training accuracy mfc_model: 92.75%
testing accuracy mfc_model: 93.43%


# Hidden Markov Model

Unigram Counts

In [36]:
def unigram_counts(sequences):
    return Counter(sequences)

tags = [tag for i, (word, tag) in enumerate(data.training_set.stream())]
tag_unigrams = unigram_counts(tags)
tag_unigrams

Counter({'everyone': 14,
         'festivals': 18,
         'and': 27,
         'festival': 32,
         'mutual': 12,
         'love': 6,
         'brotherhood': 6,
         'harmonize': 6,
         'meet': 6,
         'message': 6,
         'gives': 12,
         'does': 16,
         'other': 6,
         'words': 6,
         'in': 14,
         'get': 12,
         'we': 6,
         'virtue': 6,
         'sympathy': 6,
         'cooperation': 6,
         'humanity': 6,
         'it': 43,
         'sense': 6,
         'worth': 6,
         'noting': 12,
         'is': 71,
         'That': 6,
         'any': 13,
         'or': 32,
         'indigenous': 6,
         'be': 34,
         'foreign': 11,
         'poor': 6,
         'rich': 6,
         'class': 6,
         'whether': 6,
         'above': 6,
         'characteristic': 6,
         'superiority': 6,
         'must': 6,
         'have': 10,
         'Christmas': 7,
         'December': 6,
         "'s": 2,
         'that': 10,
     

Bigram Counts

In [37]:
def bigram_counts(sequences):
    return Counter(sequences)

tags = [tag for i, (word, tag) in enumerate(data.stream())]
o = [(tags[i],tags[i+1]) for i in range(0,len(tags)-2,2)]
tag_bigrams = bigram_counts(o)
tag_bigrams 

Counter({('they', 'be'): 4,
         ('at', 'one'): 4,
         ('have', 'this'): 4,
         ('does', 'fix'): 4,
         ('six', 'read'): 4,
         ('hand', 'they'): 2,
         ('be', 'at'): 2,
         ('one', 'have'): 2,
         ('this', 'does'): 2,
         ('fix', 'six'): 2,
         ('read', 'hand'): 2,
         ('hand', 'half'): 2,
         ('km', 'a'): 4,
         ('length', 'at'): 4,
         ('country', 'special'): 4,
         ('where', 'only'): 4,
         ('mother', 'is'): 4,
         ('door', 'opening'): 4,
         ('and', 'closing'): 4,
         ('done', 'is'): 7,
         ('day', 'only'): 4,
         ('pray', 'done'): 4,
         ('is', 'done'): 4,
         ('half', 'km'): 3,
         ('a', 'length'): 3,
         ('at', 'country'): 3,
         ('special', 'where'): 3,
         ('only', 'mother'): 3,
         ('is', 'door'): 3,
         ('opening', 'and'): 3,
         ('closing', 'done'): 3,
         ('is', 'day'): 3,
         ('only', 'pray'): 3,
         ('done', 

Beginnings of the sentences count 

In [38]:
def starting_counts(sequences):
    return Counter(sequences)

tags = [tag for i, (word, tag) in enumerate(data.stream())]
starts_tag = [i[0] for i in data.Y]
tag_starts = starting_counts(starts_tag)
tag_starts

Counter({'they': 2,
         'half': 1,
         'everyone': 1,
         'Iraq': 5,
         'Christmas': 6,
         'earth': 1,
         'one': 1,
         'for': 1})

Endings of the sentences count

In [39]:
def ending_counts(sequences):    
    return Counter(sequences)

end_tag = [i[len(i)-1] for i in data.Y]
tag_ends = ending_counts(end_tag)
tag_ends

Counter({'hand': 1,
         'done': 2,
         'is': 2,
         'it': 6,
         'has': 5,
         'any': 1,
         'said': 1})

In [40]:
end_tag = [i[len(i)-2] for i in data.Y]
tag_ends = ending_counts(end_tag)
tag_ends

Counter({'read': 1,
         'is': 2,
         'important': 1,
         'have': 6,
         'kept': 5,
         'without': 1,
         'them': 1,
         'festival': 1})

Let's create our Hidden Markov Model and peek into most popular words.

tag_words_count contains words associated to each English tag, arranged by frequency so that we can eventually evaluate emission probabilities, which are probabilities of observable states (words) given hidden states (English translation tags).

In [41]:
hmm_model = HiddenMarkovModel(name="base-hmm-tagger")

tags = [tag for i, (word, tag) in enumerate(data.stream())]
words = [word for i, (word, tag) in enumerate(data.stream())]

tags_count = unigram_counts(tags)
tag_words_count = pair_counts(tags, words)

starting_tag_list = [i[0] for i in data.Y]
#ending_tag_list = [i[-1] if len(i)==1 else i[-2] for i in data.Y]
#ending_tag_list = [i[-1] for i in data.Y]
ending_tag_list = [i[len(i)-1] for i in data.Y]

starting_tag_count = starting_counts(starting_tag_list) #the number of times a tag occured at the start
ending_tag_count = ending_counts(ending_tag_list)       #the number of times a tag occured at the end

tag_words_count

defaultdict(<function __main__.pair_counts.<locals>.<lambda>()>,
            {'they': defaultdict(int, {'वे': 6}),
             'be': defaultdict(int, {'हो': 13, 'हो ': 21, 'क्यों ': 7}),
             'at': defaultdict(int, {'पर': 6, 'पर ': 7}),
             'one': defaultdict(int, {'एक': 13}),
             'have': defaultdict(int, {'है': 6, 'होती ': 7}),
             'this': defaultdict(int, {'इस': 6, 'इसे': 2}),
             'does': defaultdict(int, {'करता है': 6, 'हे ': 7, 'है': 7}),
             'fix': defaultdict(int, {'ठीक कर': 6}),
             'six': defaultdict(int, {'छह': 6}),
             'read': defaultdict(int, {'पढ़ा': 6}),
             'hand': defaultdict(int, {'हाथ': 6}),
             'half': defaultdict(int, {'आधा': 7}),
             'km': defaultdict(int, {'किमी ': 7}),
             'a': defaultdict(int, {'की ': 7}),
             'length': defaultdict(int, {'लंबाई ': 7}),
             'country': defaultdict(int, {'देश': 7}),
             'special': defaultdict(int, {'

In [42]:
to_pass_states = []
dist = []
for tag, words_dict in tag_words_count.items():
    total = float(sum(words_dict.values()))
    distribution = {word: count/total for word, count in words_dict.items()}
    dist.append(distribution)
    tag_emissions = DiscreteDistribution(distribution)
    tag_state = State(tag_emissions, name=tag)
    to_pass_states.append(tag_state)

Let's convert word frequencies by English tag to probabilities by dividing by the total number of words per English tag, yielding the distribution of words.

We'll define HMM emission probabilities using that distribution.

In [43]:
dist

[{'वे': 1.0},
 {'हो': 0.3170731707317073,
  'हो ': 0.5121951219512195,
  'क्यों ': 0.17073170731707318},
 {'पर': 0.46153846153846156, 'पर ': 0.5384615384615384},
 {'एक': 1.0},
 {'है': 0.46153846153846156, 'होती ': 0.5384615384615384},
 {'इस': 0.75, 'इसे': 0.25},
 {'करता है': 0.3, 'हे ': 0.35, 'है': 0.35},
 {'ठीक कर': 1.0},
 {'छह': 1.0},
 {'पढ़ा': 1.0},
 {'हाथ': 1.0},
 {'आधा': 1.0},
 {'किमी ': 1.0},
 {'की ': 1.0},
 {'लंबाई ': 1.0},
 {'देश': 1.0},
 {'विशेष': 1.0},
 {'जहाँ ': 1.0},
 {'केवल ': 0.30434782608695654,
  'ही ': 0.30434782608695654,
  'केवल': 0.34782608695652173,
  'ही': 0.043478260869565216},
 {'मां': 1.0},
 {'के ': 0.28,
  'जाता ': 0.09333333333333334,
  'का ': 0.18666666666666668,
  'है': 0.22666666666666666,
  'की ': 0.09333333333333334,
  'हैं': 0.09333333333333334,
  'के': 0.013333333333333334,
  'जाता': 0.013333333333333334},
 {'पट ': 1.0},
 {'खुलने ': 1.0},
 {'और ': 0.4666666666666667,
  'ओर ': 0.4666666666666667,
  'ओर': 0.03333333333333333,
  'तथा': 0.03333333333333333

In [44]:
to_pass_states

[{
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "parameters" : [
             {
                 "\u0935\u0947" : 1.0
             }
         ],
         "frozen" : false
     },
     "name" : "they",
     "weight" : 1.0
 }, {
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "parameters" : [
             {
                 "\u0939\u094b" : 0.3170731707317073,
                 "\u0939\u094b " : 0.5121951219512195,
                 "\u0915\u094d\u092f\u094b\u0902 " : 0.17073170731707318
             }
         ],
         "frozen" : false
     },
     "name" : "be",
     "weight" : 1.0
 }, {
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "parameters" : [


In [45]:
start_prob={}

for tag in tags:
    start_prob[tag] = starting_tag_count[tag] / tags_count[tag]

for tag_state in to_pass_states :
    hmm_model.add_transition(hmm_model.start, tag_state, start_prob[tag_state.name]) 

In [46]:
end_prob={}

for tag in tags:
    end_prob[tag] = ending_tag_count[tag]/tags_count[tag]
    
for tag_state in to_pass_states :
    hmm_model.add_transition(tag_state, hmm_model.end, end_prob[tag_state.name])

We now add the transition probabilities for our model, which uses our POS bigrams to enumerate what the probabilities are for transiting from one POS tag to another.

In [47]:
transition_prob_pair={}

for key in tag_bigrams.keys():
    transition_prob_pair[key] = tag_bigrams.get(key)/tags_count[key[0]]
    
for tag_state in to_pass_states:
    for next_tag_state in to_pass_states:
        if((tag_state.name,next_tag_state.name) in transition_prob_pair):
            hmm_model.add_transition(tag_state, next_tag_state, transition_prob_pair[(tag_state.name, next_tag_state.name)])

In [48]:
hmm_model.bake()

We can now compare the HMM model accuracy with the BOW model accuracy

In [49]:
hmm_training_acc = accuracy(data.training_set.X, data.training_set.Y, hmm_model)
print("training accuracy basic hmm model: {:.2f}%".format(100 * hmm_training_acc))

hmm_testing_acc = accuracy(data.testing_set.X, data.testing_set.Y, hmm_model)
print("testing accuracy basic hmm model: {:.2f}%".format(100 * hmm_testing_acc))

training accuracy basic hmm model: 72.21%
testing accuracy basic hmm model: 100.00%


Decoding Example for a random sample

In [50]:
for key in data.testing_set.keys[:3]:
    print("Sentence Key: {}\n".format(key))
    print("Sentence: {}\n".format(data.sentences[key].words))
    print("Predicted labels:\n-----------------")
    print(simplify_decoding(data.sentences[key].words, hmm_model))
    print()
    print("Actual labels:\n--------------")
    print(data.sentences[key].tags)
    print("\n")

Sentence Key: b100-27528

Sentence: ('इराक  ', 'के', 'विदेश', 'मंत्री', 'अमरीका', 'के', 'उस', 'प्रस्ताव', 'मजाक', 'उड़ाया', 'जिसमें', 'अमरीका', 'संयुक्त', 'राष्ट्र', 'के', 'प्रतिबंधों', 'इराकी', 'नागरिकों', 'के', 'लिए', 'कम', 'हानिकारक', 'बनाने', 'के', 'लिए', 'कहा', 'आतंकवाद', 'एक', 'ऐसी', 'समस्या', 'जिसने', 'न', 'केवल', 'भारत', 'अपितु', 'पूरे', 'विश्व', 'को', 'अपने', 'लपेटे', 'ले', 'रखा', 'है')

Predicted labels:
-----------------
['Iraq', 'for', 'foreign', 'minister', 'america', 'for', 'that', 'proposal', 'mock', 'made', 'in', 'america', 'united', 'nations', 'for', 'sanctions', 'Iraq', 'citizens', 'for', 'them', 'less', 'harmful', 'make', 'for', 'them', 'said', 'Terrorism', 'one', 'that', 'problem', 'which', 'not', 'only', 'India', 'but', 'whole', 'world', 'for', 'our', 'engulfed', 'take', 'kept', 'has']

Actual labels:
--------------
('Iraq', 'for', 'foreign', 'minister', 'america', 'for', 'that', 'proposal', 'mock', 'made', 'in', 'america', 'united', 'nations', 'for', 'sanctions', '