## chartEntry Datastructure

In [1]:
from __future__ import print_function

class chartEntry:
    """
    This class implements the data structure "Entry" described in the baseline
    algorithm. The pseduocode for the description is:
    
    Entry(word, start-position, end-position, log-probability, back-pointer)
    
    We define the __lt__ and __eq__ operators to allow comparisons betwen two entries when 
    trying to push them into the heap. This operator respects the sort_acc_to variable and 
    provides boolean value accordingly. 

    It supports operations based on two ideas - i) sorting by start_pos
                                                ii) sorting by log_prob. 
        
    EXAMPLE USAGE: 
        p = chartEntry('Anmol', 0, 4, 0.2, -1, sort_acc_to='start_pos')
        print(p)
        p.get_item('log_prob')
        e1 = chartEntry('Anmol', 0, 4, 0.2, -1)
        e2 = chartEntry("Shreeashish", 5, 10, 0.5, 0)
        e3 = chartEntry('Amir Ali', 11, 16, 0.4, 1)
        
    This work is a part of the Assignment 1 of CMPT 825 Natural Language Processing
    taught by Prof. Anoop Sarkar. 
    
    AUTHOR: Anmol Sharma, GroupNLP
    INSTITUTION: Simon Fraser University
    """
    
    def __init__(self, word, start_pos, end_pos, log_prob, back_ptr, sort_acc_to='start_pos'):
        self.instance = {}
        self.instance['word'] = word
        self.instance['start_pos'] = start_pos
        self.instance['end_pos'] = end_pos
        self.instance['log_prob'] = log_prob
        self.instance['back_ptr'] = back_ptr
        self.__sort_type = sort_acc_to
        
    def __repr__(self):
        return "chartEntry(%s, %d, %d, %f, %f)" % (self.instance['word'], self.instance['start_pos'], \
                                              self.instance['end_pos'], self.instance['log_prob'],\
                                              self.instance['back_ptr'])
    
    def __lt__(self, other_obj):
        if self.__sort_type == 'start_pos':
            return (self.instance['start_pos'] < other_obj.instance['start_pos'])
        else:
            return (self.instance['log_prob'] < other_obj.instance['log_prob'])
    
    def __eq__(self,other_obj):
        if self.__sort_type == 'start_pos':
            return (self.instance['start_pos'] == other_obj.instance['start_pos'])
        else:
            return (self.instance['log_prob'] == other_obj.instance['log_prob'])
    
    def get_item(self, key):
        return self.instance[key] if key in self.instance else "Undefined Key"
    

## Start building the heap

In [2]:
import heapq as heapq
class Heap:
    """
    A class wrapper for heapq datastructure implementation of python. Python's default heapq 
    implementation requires a list as initialized heap, however it doesn't provide
    any safeguards against the fact that the underlying list may be changed by some function. 
    
    To provide safeguard mechanism, this class wraps the push and pop functions of heapq. 
    
    EXAMPLE USAGE:
        sort_acc_to='log_prob'
        p = chartEntry('Anmol', 0, 4, 0.2, -1, sort_acc_to)
        print(p)
        p.get_item('log_prob')
        e1 = chartEntry('Anmol', 0, 4, 0.6, -1, sort_acc_to)
        e2 = chartEntry("Shreeashish", 5, 10, 0.5, 0, sort_acc_to)
        e3 = chartEntry('Amir Ali', 11, 16, 0.1, 1, sort_acc_to)
        heap1 = Heap()
        heap1.push(e1)
        heap1.push(e2)
        heap1.push(e3)
        heap1.pop()
        
    This work is a part of the Assignment 1 of CMPT 825 Natural Language Processing
    taught by Prof. Anoop Sarkar. 
    
    AUTHOR: Anmol Sharma, GroupNLP
    INSTITUTION: Simon Fraser University
    """
    
    def __init__(self, ls=None):
        self.__heap = ls if ls else []
        heapq.heapify(self.__heap)
        
    def push(self, item):
        heapq.heappush(self.__heap, item)
    
    def pop(self):
        return heapq.heappop(self.__heap)
    
    def __len__(self):
        return len(self.__heap)
    
    def __repr__(self):
        return "{}".format(self.__heap)

### Testing

In [3]:
sort_acc_to='log_prob'
p = chartEntry('Anmol', 0, 4, 0.2, -1, sort_acc_to)
print(p)
p.get_item('log_prob')
e1 = chartEntry('Anmol', 0, 4, 0.6, -1, sort_acc_to)
e2 = chartEntry("Shreeashish", 5, 10, 0.5, 0, sort_acc_to)
e3 = chartEntry('Amir Ali', 11, 16, 0.1, 1, sort_acc_to)

chartEntry(Anmol, 0, 4, 0.200000, -1.000000)


In [4]:
print(e1, e2, e3)

chartEntry(Anmol, 0, 4, 0.600000, -1.000000) chartEntry(Shreeashish, 5, 10, 0.500000, 0.000000) chartEntry(Amir Ali, 11, 16, 0.100000, 1.000000)


In [5]:
heap1 = Heap()

In [6]:
print(heap1)

[]


Testing the heap property, that the entry with lowest start-pos is the first to be popped, regardless of the order of pushing them into heap. 

In [7]:
heap1.push(e1)
heap1.push(e2)
heap1.push(e3)

In [8]:
heap1.pop()

chartEntry(Amir Ali, 11, 16, 0.100000, 1.000000)

In [9]:
heap1.pop()

chartEntry(Shreeashish, 5, 10, 0.500000, 0.000000)

In [10]:
heap1.pop()

chartEntry(Anmol, 0, 4, 0.600000, -1.000000)

Testing the heap property, that the entry with lowest start-pos is the first to be popped, regardless of the order of pushing them into heap. 

In [11]:
heap2 = Heap()

In [12]:
heap2.push(e3)
heap2.push(e2)
heap2.push(e1)

In [13]:
heap2.pop()

chartEntry(Amir Ali, 11, 16, 0.100000, 1.000000)

In [14]:
heap2.pop()

chartEntry(Shreeashish, 5, 10, 0.500000, 0.000000)

In [15]:
heap2.pop()

chartEntry(Anmol, 0, 4, 0.600000, -1.000000)