In [1]:
import numpy as np
from itertools import permutations

In [2]:
train_path_en = "Treebanks/english/train/wsj_train.only-projective.first-1k.conll06"

In [3]:
train_path_en.split('/')[-1]

'wsj_train.only-projective.first-1k.conll06'

In [4]:
class Token:
    
    def __init__(self):
        self.id = None
        self.form = None
        self.lemma = None
        self.pos = None
        self.xpos = None
        self.morph = None
        self.head = None
        self.deprel = None
        self.x = None
        self.y = None

In [5]:
class Sentence:
    
    def __init__(self, token_items):        
        # create ROOT token
        root = Token()
        root.id = '0'
        root.form = 'ROOT'
        root.lemma = '_'
        root.pos = 'ROOT'
        root.xpos = '_'
        root.morph = '_'
        root.head = '_'
        root.deprel = '_'
        root.x = '_'
        root.y = '_'
        
        # initialize tokens with ROOT
        self.tokens = [root]
        
        # add each token in token_items to sentence
        for token in token_items:
            self.tokens.append(token)
                
    def potential_arcs(self):
        '''
            arc_list = list of (head_int, token(dependency)_int) tuples
        '''
        #arc_list = list(permutations(self.tokens[1:], 2))
        arc_list = [(arc[0].id, arc[1].id) for arc in list(permutations(self.tokens[1:], 2))]
        for token in self.tokens[1:]:
            arc_list.append((self.tokens[0].id, token.id))
        return set(arc_list)
        
    def gold_arcs(self):
        '''
            arc_list = dictionary (key=dependent, val=head)
        '''
        arc_list = {}
        for token in self.tokens[1:]:
            #if token.form == "ROOT":
                #continue
            #else:
            head = token.head; dep = token.id
            arc_list[dep] = head
        return arc_list

In [6]:
class Reader:
    
    def __init__(self, filepath):
        self.filepath = filepath
        self.sentences = []
    
    def read_file(self):
        f = open(self.filepath)
        sentences = []
    
        # init token_items
        token_items = []
    
        for line in f:
            # init current sentence and token
            token = Token()
        
            # if not at end of sentence
            if line != '\n':
                items = line.split('\t')
                #print(items)
            
                # add token data
                token.id = items[0]
                token.form = items[1]
                token.lemma = items[2]
                token.pos = items[3]
                token.xpos = items[4]
                token.morph = items[5]
                token.head = items[6]
                token.deprel = items[7]
                token.x = items[8]
                token.y = items[9]
            
                # add Token to Sentence
                token_items.append(token)
        
            # add sentence and reset token_items
            else:
                sentences.append(Sentence(token_items))
                token_items = []
                
        f.close()
        self.sentences = sentences

In [7]:
class Writer:
    
    def __init__(self, filepath, sentences):
        self.filepath = filepath
        self.sentences = sentences
        
    def write_file(self):
        target_filename = self.filepath.split('/')[-1]
        with open(target_filename+'.pred', 'w') as f:
            for sentence in self.sentences:
                for token in sentence.tokens:
                    if token.form == 'ROOT':
                        continue
                    else:
                        line = ""
                        line+=token.id+'\t'
                        line+=token.form+'\t'
                        line+=token.lemma+'\t'
                        line+=token.pos+'\t'
                        line+=token.xpos+'\t'
                        line+=token.morph+'\t'
                        line+=token.head+'\t'
                        line+=token.deprel+'\t'
                        line+=token.x+'\t'
                        line+=token.y
                    
                        f.write(str(line))
                f.write('\n')
            f.close()

In [8]:
train_1k_sentences = Reader(train_path_en)
train_1k_sentences.read_file()
train_1k_sentences.sentences

[<__main__.Sentence at 0x1065e2f10>,
 <__main__.Sentence at 0x1065e2a30>,
 <__main__.Sentence at 0x1065e2af0>,
 <__main__.Sentence at 0x1065d7ca0>,
 <__main__.Sentence at 0x1065d7190>,
 <__main__.Sentence at 0x1065d70a0>,
 <__main__.Sentence at 0x1065d7070>,
 <__main__.Sentence at 0x1065b2f70>,
 <__main__.Sentence at 0x1065b2d60>,
 <__main__.Sentence at 0x1065fb490>,
 <__main__.Sentence at 0x1065fbbb0>,
 <__main__.Sentence at 0x1066073d0>,
 <__main__.Sentence at 0x106607790>,
 <__main__.Sentence at 0x1066120a0>,
 <__main__.Sentence at 0x1066123a0>,
 <__main__.Sentence at 0x1066129d0>,
 <__main__.Sentence at 0x106612dc0>,
 <__main__.Sentence at 0x10661c730>,
 <__main__.Sentence at 0x10661ce50>,
 <__main__.Sentence at 0x1066281f0>,
 <__main__.Sentence at 0x106628430>,
 <__main__.Sentence at 0x106628df0>,
 <__main__.Sentence at 0x106633220>,
 <__main__.Sentence at 0x106633490>,
 <__main__.Sentence at 0x106633a00>,
 <__main__.Sentence at 0x106633e20>,
 <__main__.Sentence at 0x10663d310>,
 

In [9]:
from difflib import Differ
 
with open(train_path_en) as file_1, open(train_path_en.split('/')[-1]+'.pred') as file_2:
    differ = Differ()
 
    for line in differ.compare(file_1.readlines(), file_2.readlines()):
        print(line)

  1	In	in	IN	_	_	43	ADV	_	_

  2	an	an	DT	_	_	5	NMOD	_	_

  3	Oct.	oct.	NNP	_	_	5	NMOD	_	_

  4	19	0	CD	_	_	3	NMOD	_	_

  5	review	review	NN	_	_	20	NMOD	_	_

  6	of	of	IN	_	_	5	NMOD	_	_

  7	``	``	``	_	_	9	P	_	_

  8	The	the	DT	_	_	9	NMOD	_	_

  9	Misanthrope	misanthrope	NN	_	_	6	PMOD	_	_

  10	''	''	''	_	_	9	P	_	_

  11	at	at	IN	_	_	9	NMOD	_	_

  12	Chicago	chicago	NNP	_	_	15	NMOD	_	_

  13	's	's	POS	_	_	12	NMOD	_	_

  14	Goodman	goodman	NNP	_	_	15	NMOD	_	_

  15	Theatre	theatre	NNP	_	_	11	PMOD	_	_

  16	(	(	-LRB-	_	_	20	P	_	_

  17	``	``	``	_	_	20	P	_	_

  18	Revitalized	revitalize	VBN	_	_	19	NMOD	_	_

  19	Classics	classic	NNS	_	_	20	SBJ	_	_

  20	Take	take	VBP	_	_	1	PMOD	_	_

  21	the	the	DT	_	_	22	NMOD	_	_

  22	Stage	stage	NN	_	_	20	OBJ	_	_

  23	in	in	IN	_	_	20	ADV	_	_

  24	Windy	windy	NNP	_	_	25	NMOD	_	_

  25	City	city	NNP	_	_	23	PMOD	_	_

  26	,	,	,	_	_	20	P	_	_

  27	''	''	''	_	_	20	P	_	_

  28	Leisure	leisure	NN	_	_	20	NMOD	_	_

  29	&	and	CC	_	_	28	COORD	_	_

  30	Arts	ar

# Decoder (Eisner's Algorithm)

In [10]:
test_matrix = np.array([[-10000, 9, 10, 9], 
               [-10000, -10000, 20, 3], 
               [-10000, 30, -10000, 30],
               [-10000, 11, 0, -10000]])
test_matrix[0, 2]

10

In [11]:
# Some constants
L, R = 0, 1
I, C = 0, 1
DIRECTIONS = (L, R)
COMPLETENESS = (I, C)
NEG_INF = -float('inf')


class Span(object):
    def __init__(self, left_idx, right_idx, head_side, complete):
        self.data = (left_idx, right_idx, head_side, complete)

    @property
    def left_idx(self):
        return self.data[0]

    @property
    def right_idx(self):
        return self.data[1]

    @property
    def head_side(self):
        return self.data[2]

    @property
    def complete(self):
        return self.data[3]

    def __str__(self):
        return "({}, {}, {}, {})".format(
            self.left_idx,
            self.right_idx,
            "L" if self.head_side == L else "R",
            "C" if self.complete == C else "I",
        )

    def __repr__(self):
        return self.__str__()

    def __hash__(self):
        return hash(self.data)

    def __eq__(self, other):
        return isinstance(other, Span) and hash(other) == hash(self)

In [12]:
def eisner(weight):
    """
    `N` denotes the length of sentence.

    :param weight: size N x N
    :return: the projective tree with maximum score
    """
    N = weight.shape[0]

    btp = {}  # Back-track pointer
    dp_s = {}

    # Init
    for i in range(N):
        for j in range(i + 1, N):
            for dir in DIRECTIONS:
                for comp in COMPLETENESS:
                    dp_s[Span(i, j, dir, comp)] = NEG_INF
    #print(dp_s)

    # base case
    for i in range(N):
        for dir in DIRECTIONS:
            dp_s[Span(i, i, dir, C)] = 0.
            btp[Span(i, i, dir, C)] = None

    rules = [
        # span_shape_tuple := (span_direction, span_completeness),
        # rule := (span_shape, (left_subspan_shape, right_subspan_shape))
        ((L, I), ((R, C), (L, C))),
        ((R, I), ((R, C), (L, C))),
        ((L, C), ((L, C), (L, I))),
        ((R, C), ((R, I), (R, C))),
    ]

    for size in range(1, N):
        for i in range(0, N - size):
            j = i + size
            for rule in rules:
                ((dir, comp), ((l_dir, l_comp), (r_dir, r_comp))) = rule

                if comp == I:
                    edge_w = weight[i, j] if (dir == R) else weight[j, i]
                    k_start, k_end = (i, j)
                    offset = 1
                else:
                    edge_w = 0.
                    k_start, k_end = (i + 1, j + 1) if dir == R else (i, j)
                    offset = 0

                span = Span(i, j, dir, comp)
                for k in range(k_start, k_end):
                    l_span = Span(i, k, l_dir, l_comp)
                    r_span = Span(k + offset, j, r_dir, r_comp)
                    s = edge_w + dp_s[l_span] + dp_s[r_span]
                    if s > dp_s[span]:
                        dp_s[span] = s
                        btp[span] = (l_span, r_span)

    # recover tree
    return back_track(btp, Span(0, N - 1, R, C), {})


def back_track(btp, span, edge_set):
    if span.complete == I:
        if span.head_side == L:
            edge = (span.right_idx, span.left_idx)
        else:
            edge = (span.left_idx, span.right_idx)
        edge_set[str(edge[1])] = str(edge[0])

    if btp[span] is not None:
        l_span, r_span = btp[span]

        back_track(btp, l_span, edge_set)
        back_track(btp, r_span, edge_set)
    else:
        return

    return edge_set

In [13]:
eisner(test_matrix)

{'2': '0', '1': '2', '3': '2'}

In [14]:
test_list = []
test_list.append((2, 1))
test_list

[(2, 1)]

In [15]:
np.zeros([4, 4])[1, 1]

0.0

In [16]:
tree = []

def Eisner(edge_scores):
    # Get length of sentence
    #n = len(sentence.tokens)
    n=4
    
    # Initialize matrices with zeros
    O_r = np.zeros([n, n], dtype=np.float32)
    O_l = np.zeros([n, n], dtype=np.float32)
    C_r = np.zeros([n, n], dtype=np.float32)
    C_l = np.zeros([n, n], dtype=np.float32)
    
    # Initialize backtracking matrices
    b_Or = np.zeros([n, n], dtype=np.float32)
    b_Ol = np.zeros([n, n], dtype=np.float32)
    b_Cr = np.zeros([n, n], dtype=np.float32)
    b_Cl = np.zeros([n, n], dtype=np.float32)
    
    for m in np.arange(1, n):
        for s in np.arange(0, n-m):
            t = s+m
            
            # O_r
            q=s
            max_q_score = -10000; max_q = q
            while q < t:
                curr_q_score = C_l[s][q] + C_r[q+1][t] + edge_scores[t][s]
                #print("O_r step: " + str((C_l[s][q], C_r[q+1][t], edge_scores[t][s])))
                if curr_q_score > max_q_score:
                    max_q_score = curr_q_score; max_q = q
                q += 1
            #print("O_r max_q: " + str(max_q_score))
            O_r[s][t] = max_q_score; b_table[s][t] = (max_q, "Open", "R")
            
            # O_l
            q=s
            max_q_score = -10000; max_q = q
            while q < t:
                curr_q_score = C_l[s][q] + C_r[q+1][t] + edge_scores[s][t]
                #print("O_l step: " + str((C_l[s][q], C_r[q+1][t], edge_scores[s][t])))
                if curr_q_score > max_q_score:
                    max_q_score = curr_q_score; max_q = q
                q += 1
            #print("O_l max_q: " + str(max_q_score))
            O_l[s][t] = max_q_score; b_table[s][t] = (max_q, "Open", "L")
            
            # C_r
            q=s
            max_q_score = -10000; max_q = q
            while q < t:
                curr_q_score = C_r[s][q] + O_r[q][t]
                #print("C_r step: " + str((C_r[s][q], O_r[q][t])))
                if curr_q_score > max_q_score:
                    max_q_score = curr_q_score; max_q = q
                q += 1
            #print("C_r max_q: " + str(max_q_score))
            C_r[s][t] = max_q_score; b_table[s][t] = (max_q, "Closed", "R")
                
            # C_l
            q=s+1
            max_q_score = -10000; max_q = q
            while q <= t:
                curr_q_score = O_l[s][q] + C_l[q][t]
                #print("C_l step: " + str((O_l[s][q], C_l[q][t])))
                if curr_q_score > max_q_score:
                    max_q_score = curr_q_score; max_q = q
                q += 1
            #print("C_l max_q: " + str(max_q_score))
            C_l[s][t] = max_q_score; b_table[s][t] = (max_q, "Closed", "L")
            
            #print("s: " + str(s))
            #print("")

    print(O_r)
    print(O_l)
    print(C_r)
    print(C_l)
    print("")
    
    for line in b_table:
        print(line)
    
    d_index = np.argmax(C_l[0])
    tree.append((0, d_index))
    #return C_l
    #return backtrack(b_Or, b_Ol, b_Cr, b_Cl, 0, d_index, n)
    return C_l[0][n-1]

def backtrack(C_r, C_l, b_Cr, b_Cl, h_index, d_index, n):
    if h_index > d_index:
        direction = "L"
    else:
        direction = "R"
    for n in np.arange(0, n):
        if direction == "L":
            next_index = np.argmax(C_l[h_index])
            tree.append((h_index, next_index))
            h_index = b_Cl[h_index][next_index]
        else:
            next_index = np.argmax(C_r[h_index])
            h_index = b_Cr[h_index][next_index]

In [17]:
Eisner(test_matrix)

NameError: name 'b_table' is not defined

# Feature Templates, Extraction, and Mapping

In [None]:
### Feature templates

unigram_features = ["hform, hpos", "hform", "hpos", "dform, dpos", "dform", "dpos"]
bigram_features = ["hform, hpos, dform, dpos", 
                   "hpos, dform, dpos", 
                   "hform, dform, dpos", 
                   "hform, hpos, dform", 
                   "hform, hpos, dpos", 
                   "hform, dform", 
                   "hpos, dpos"]

# Combine features with direction of edge and distance between head and dependent

In [18]:
test_sentence = train_1k_sentences.sentences[0]
vars(test_sentence.tokens[1])

{'id': '1',
 'form': 'In',
 'lemma': 'in',
 'pos': 'IN',
 'xpos': '_',
 'morph': '_',
 'head': '43',
 'deprel': 'ADV',
 'x': '_',
 'y': '_\n'}

In [19]:
test_sentence.gold_arcs()

{'1': '43',
 '2': '5',
 '3': '5',
 '4': '3',
 '5': '20',
 '6': '5',
 '7': '9',
 '8': '9',
 '9': '6',
 '10': '9',
 '11': '9',
 '12': '15',
 '13': '12',
 '14': '15',
 '15': '11',
 '16': '20',
 '17': '20',
 '18': '19',
 '19': '20',
 '20': '1',
 '21': '22',
 '22': '20',
 '23': '20',
 '24': '25',
 '25': '23',
 '26': '20',
 '27': '20',
 '28': '20',
 '29': '28',
 '30': '29',
 '31': '20',
 '32': '43',
 '33': '34',
 '34': '43',
 '35': '34',
 '36': '35',
 '37': '34',
 '38': '34',
 '39': '38',
 '40': '41',
 '41': '39',
 '42': '34',
 '43': '0',
 '44': '43',
 '45': '43',
 '46': '45',
 '47': '48',
 '48': '46',
 '49': '43'}

In [20]:
test_data = train_1k_sentences.sentences[:1]

In [21]:
next((x for x in test_data[0].tokens if x.id == '1'), None)

<__main__.Token at 0x105838f10>

In [34]:
### Feature extraction
test_features = ["dform", "dpos", "dform, dpos"]

class FeatureMapping:
    
    '''
        IMPORTANT: m features for each n token
        AKA n*m feature vectors
        .map -> dictionary
            keys = feature_name, values = feature_vector_id
                ex. feature_name = 'hform=likes'
                    feature_vector_id = 1
    '''
    
    def __init__(self, feature_list, sentences):
        self.sentences = sentences
        self.feature_list = feature_list
        self.feature_dict = {"dform": "form", "dpos": "pos", "dform, dpos": "form, pos"}
        self.map = {}
        #self.feature_vectors = []
        self.frozen = False
    
    def get_featureVal(self, feature_name, token):
        '''
            Get full feature name from feature_name and token
        '''
        full_feature = feature_name+"="
        # Unigram features
        if feature_name in ["dform", "dpos"]:
            feature_val = getattr(token, feature_name[1:])
            if feature_val == "_":
                full_feature += "_NULL_"
            else:
                full_feature += feature_val
        # Bigram features
        if feature_name == "dform, dpos":
            form_val = "_NULL_" if token.form == "_" else token.form
            pos_val = "_NULL_" if token.pos == "_" else token.pos
            full_feature += form_val+"+"+pos_val
        return full_feature
    
    def create_mapping(self):
        '''
            Create feature mapping for all sentences
        '''
        i=0
        for sentence in self.sentences:
            sentence_arcs = sentence.potential_arcs()
            for arc in sentence_arcs:
                # Get current arc
                #print(arc)
                head = next((x for x in sentence.tokens if x.id == arc[0]), None)
                dep = next((x for x in sentence.tokens if x.id == arc[1]), None)
                for feature_name in self.feature_list:
                    full_feature = self.get_featureVal(feature_name, dep)
                    # Check if full_feature already exists
                    if full_feature not in self.map.keys():
                        self.map[full_feature] = i
                        i += 1
    
    def extract(self, sentence):
        '''
            Extract arc-wise features for one sentence
        '''
        sentence_arcs = sentence.potential_arcs()
        arc_vectors = {}
        for arc in sentence_arcs:
            feature_vector = []
            dep = next((x for x in sentence.tokens if x.id == arc[1]), None)
            for feature_name in self.feature_list:
                full_feature = self.get_featureVal(feature_name, dep)
                feature_vector.append(self.map[full_feature])
            arc_vectors[arc] = feature_vector
        return arc_vectors

In [35]:
test_mapping = FeatureMapping(test_features, train_1k_sentences.sentences)
test_mapping.create_mapping()
test_mapping.map

{'dform=,': 0,
 'dpos=,': 1,
 'dform, dpos=,+,': 2,
 'dform=In': 3,
 'dpos=IN': 4,
 'dform, dpos=In+IN': 5,
 'dform=Windy': 6,
 'dpos=NNP': 7,
 'dform, dpos=Windy+NNP': 8,
 'dform=The': 9,
 'dpos=DT': 10,
 'dform, dpos=The+DT': 11,
 'dform=Christina': 12,
 'dform, dpos=Christina+NNP': 13,
 'dform=Revitalized': 14,
 'dpos=VBN': 15,
 'dform, dpos=Revitalized+VBN': 16,
 'dform=Celimene': 17,
 'dform, dpos=Celimene+NNP': 18,
 'dform=of': 19,
 'dform, dpos=of+IN': 20,
 'dform=Goodman': 21,
 'dform, dpos=Goodman+NNP': 22,
 'dform=attributed': 23,
 'dform, dpos=attributed+VBN': 24,
 'dform=Arts': 25,
 'dpos=NNS': 26,
 'dform, dpos=Arts+NNS': 27,
 'dform=in': 28,
 'dform, dpos=in+IN': 29,
 'dform=``': 30,
 'dpos=``': 31,
 'dform, dpos=``+``': 32,
 'dform=.': 33,
 'dpos=.': 34,
 'dform, dpos=.+.': 35,
 "dform='s": 36,
 'dpos=POS': 37,
 "dform, dpos='s+POS": 38,
 'dform=)': 39,
 'dpos=-RRB-': 40,
 'dform, dpos=)+-RRB-': 41,
 'dform=the': 42,
 'dform, dpos=the+DT': 43,
 'dform=by': 44,
 'dform, d

In [37]:
#test_mapping.extract(train_1k_sentences.sentences[1])

{('5', '3'): [102, 103, 104],
 ('1', '2'): [79, 7, 80],
 ('3', '4'): [105, 7, 106],
 ('5', '1'): [107, 7, 108],
 ('1', '4'): [105, 7, 106],
 ('4', '3'): [102, 103, 104],
 ('3', '5'): [33, 34, 35],
 ('4', '5'): [33, 34, 35],
 ('0', '2'): [79, 7, 80],
 ('3', '1'): [107, 7, 108],
 ('4', '1'): [107, 7, 108],
 ('2', '4'): [105, 7, 106],
 ('1', '3'): [102, 103, 104],
 ('1', '5'): [33, 34, 35],
 ('0', '4'): [105, 7, 106],
 ('2', '3'): [102, 103, 104],
 ('5', '2'): [79, 7, 80],
 ('2', '5'): [33, 34, 35],
 ('2', '1'): [107, 7, 108],
 ('0', '3'): [102, 103, 104],
 ('0', '1'): [107, 7, 108],
 ('5', '4'): [105, 7, 106],
 ('0', '5'): [33, 34, 35],
 ('3', '2'): [79, 7, 80],
 ('4', '2'): [79, 7, 80]}

# Weight Vector

In [24]:
test_weights = np.ones(len(test_mapping.map), dtype=np.float32)
#test_weights[0] = 1
#test_weights[4] = 1
#test_weights[5] = 1
test_weights

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)

# Evaluation

In [25]:
def get_UAS(pred_arcs, gold_arcs):
    

IndentationError: expected an indented block (3552507006.py, line 2)

# Perceptron

In [26]:
def edge_scores(weights, features, sentence):
    features.create_mapping()
    arcs = sentence.potential_arcs()
    scores = np.zeros([len(sentence.tokens), len(sentence.tokens)], dtype=np.float32)
    feature_vectors = features.extract(sentence)
    for arc in arcs:
        head, dep = int(arc[0]), int(arc[1])
        # Calculating arc scores
        scores[head][dep] = sum(weights[feature_vectors[arc]])
    return scores

In [27]:
edge_scores(test_weights, test_mapping, test_data[0])

array([[0., 3., 3., ..., 3., 3., 3.],
       [0., 0., 3., ..., 3., 3., 3.],
       [0., 3., 0., ..., 3., 3., 3.],
       ...,
       [0., 3., 3., ..., 0., 3., 3.],
       [0., 3., 3., ..., 3., 0., 3.],
       [0., 3., 3., ..., 3., 3., 0.]], dtype=float32)

In [38]:
def perceptron(data, features, epochs):
    weights = np.zeros(len(features.map), dtype=np.float32)
    for i in np.arange(epochs):
        print("Epoch: " + str(i+1))
        correct_arcs = 0; total_arcs = 0
        sentence_count = 1
        for sentence in data:
            print(sentence_count)
            arc_scores = edge_scores(weights, features, sentence)
            predicted = eisner(arc_scores); gold = sentence.gold_arcs()
            #print(predicted)
            #print("")
            #print(gold)
            feature_vectors = features.extract(sentence)
            # Compare predicted tree with gold tree looping over tokens
            for token in sentence.tokens[1:]:
                total_arcs += 1
                dep = token.id
                #print(dep)
                predicted_head = predicted[dep]
                #print("Predicted head: " + predicted_head)
                gold_head = gold[dep]
                #print("Gold head: " + gold_head)
                predicted_arc = (predicted_head, dep)
                gold_arc = (gold_head, dep)
                #print("")
                #print("Predicted: " + (str(predicted_arc)))
                #print("Gold: " + str(gold_arc))
                if predicted_arc[0] != gold_arc[0]:
                    predicted_arc_vector = feature_vectors[predicted_arc]
                    gold_arc_vector = feature_vectors[gold_arc]
                    #print(predicted_arc_vector, gold_arc_vector)
                    for gold_arc_index in gold_arc_vector:
                        weights[gold_arc_index] += 1
                    for predicted_arc_index in predicted_arc_vector:
                        weights[predicted_arc_index] -= 1
                else:
                    correct_arcs += 1
            sentence_count += 1
                    
#            for predicted_arc in predicted:
#                # Add to total
#                total_arcs += 1
#                predicted_head, dep = str(predicted_arc[0]), str(predicted_arc[1])
#                predicted_arc = (predicted_head, dep)
#                # If predicted head is incorrect, update weights
#                if predicted_head != gold_head:
#                    gold_arc = (gold_head, dep)
#                    gold_arc_vector = feature_vectors[gold_arc]
#                    print(gold_arc, gold_arc_vector)
#                    predicted_arc_vector = feature_vectors[predicted_arc]
#                    for gold_arc_index in gold_arc_vector:
#                        weights[gold_arc_index] += 1
#                    for predicted_arc_index in predicted_arc_vector:
#                        weights[predicted_arc_index] -= 1
#                # Otherwise, add to correct predictions for UAS
#                else:
#                    correct_arcs += 1
                    
        UAS = correct_arcs / total_arcs
        print("UAS: " + str(UAS))

In [39]:
perceptron(train_1k_sentences.sentences, test_mapping, 1)

Epoch: 1
1
2
3
4
5


KeyboardInterrupt: 