In [1]:
import xml.etree.ElementTree as ET
import numpy as np
from copy import deepcopy
from pprint import pprint
from keras.preprocessing import sequence as seq
import glob

  from ._conv import register_converters as _register_converters
Using Theano backend.


In [2]:
def _check_sentence(xml_tree, accept_tags=["forest","tree"]):
    
    """
    Funkcja sprawdza poprawnosc wypowiedzenia i arumentu: 
    - czy istnieje dla niego poprawne drzewo - wypowiedzenie jest poprawne jesli base_answer na polu "type" ma wartosc "FULL".
    - arumentem powinno byc drzewo o tagu korzenia rownym "forest" lub "tree".
    [W oryginalnych plikach z lasami jest to "forest", natomiast gdy z lasu tworzone sa pojedyncze drzewa,
    to maja one tag "tree"]
    
    xml_tree - las drzew lub drzewo [xml.etree.ElementTree.ElementTree]
    """
    
    if type(xml_tree) != ET.ElementTree:
        raise AssertionError("Argument xml_tree is not not ElementTree")
    
    
    if type(accept_tags) == str:
        accept_tags = [accept_tags]
    
    
    if not xml_tree.getroot().tag in accept_tags:
        raise AssertionError('Argument in not in [' + ",".join(accept_tags) + '] - it has tag "' + xml_tree.getroot().tag + '"' )
    
    
    base_answer_type = xml_tree.getroot().find('.//answer-data//base-answer').attrib["type"]
    correct = base_answer_type == "FULL"

    if not correct:
        raise AssertionError("Sentence is not correct: Node <base-answer> has type value " + base_answer_type  + " instead of 'FULL'")
        
    pass


def number_of_trees_in_forest(forest):

    """
    Funkcja zwraca liczbe drzew w lesie forest.
    
    forest - las drzew [xml.etree.ElementTree.ElementTree]
    """
    
    _check_sentence(forest,"forest")
    
    return int(forest.find("stats").attrib["trees"])
    
    
def get_node(tree, node_id):
    return(tree.find(".//node[@nid='" + str(node_id) + "']"))

def is_ambigous(node):
    if len(node.findall("children"))>1:
        return(True)
    else:
        return(False)
    
def is_terminal(node):
    if node.find("terminal"):
        return(True)
    else:
        return(False)    
    
def transform_abmigous_node_to_disjunctive_nodes(node, root, max_id_of_conjunctive_nodes):
 
    """
    Funkcja modyfikuje node przez referencje!
    """

    number_of_disjunctive_nodes = 0

    childrens = node.findall("children")

    disjunctive_children = ET.Element('children')

    node.set("type","conjunctive_node_with_disjunctive_children")
    
    for children in list(childrens):

        new_disjunctive_node = ET.SubElement(root, 'node')
        new_disjunctive_node.set("type", "disjunctive")
        number_of_disjunctive_nodes += 1

        new_id = str(max_id_of_conjunctive_nodes+number_of_disjunctive_nodes)
        new_disjunctive_node.set("nid",new_id)

        disjuntive_child = ET.SubElement(disjunctive_children, 'child')
        disjuntive_child.set("nid",new_id)

        new_disjunctive_node.set("chosen",children.attrib.get("chosen","false"))
        category = ET.SubElement(new_disjunctive_node, 'nonterminal')
        category = ET.SubElement(category, 'category')
        category.text = "disjunctive_node"
        new_disjunctive_node.append(deepcopy(children))

        node.remove(children)

    node.append(disjunctive_children)
    
    
def get_maximum_node_id(tree):
    return(np.max([int(node.attrib["nid"]) for node in tree.findall("node")]))

def transform_to_graph_with_con_and_dis_nodes(tree):
    
    max_id_of_conjunctive_nodes = get_maximum_node_id(tree)
    root = tree.getroot()

    for node_id in range(max_id_of_conjunctive_nodes+1):

        node = get_node(tree, node_id)
        node.set("type", "terminal" if is_terminal(node) else "conjunctive")

        if is_ambigous(node):

            transform_abmigous_node_to_disjunctive_nodes(node, root, get_maximum_node_id(tree))

def terminals(tree):

    terminal_nodes = [x for x in tree.findall("node[terminal]")]

    terminals = [[(x.attrib["nid"],
                   x.find("terminal//orth").text.replace(" ", ""),  # zdarzaja sie przypadki ze token zawiera w sobie spacje i potem wyglada to jakby bylo wiecej tokenow i sie dlugosc nie zgadza
                   x.find("terminal//base").text, 
                   x.find("terminal//f").text)]  for x in terminal_nodes]

    ids = [x[0][0] for x in terminals]

    return terminals, ids 



def get_ids(tree):
    
    _ , ids = terminals(tree) 
    
    while "0" not in ids:

        for nid in ids:
            t = time.time()
            parents = tree.findall(".//children/child[@nid='"+str(nid)+"']....")

            for parent in parents:
                childs = parent.findall("children/child")
                childs_ids = [child.attrib["nid"] for child in childs]
                if np.all([child_id in ids for child_id in childs_ids]) and parent.attrib["nid"] not in ids:
                    ids.append(parent.attrib["nid"])

    return(ids)

def type_of_node(node):
    return(node.attrib["type"])


def token(terminal):
    assert is_terminal(terminal)
    return(terminal.find("terminal//orth").text.replace(" ", "")) # zdarzaja sie sytuacje, ze w tokenie jest spacja co psuje strukture


def get_head(tree, node_id):
    
    node = get_node(tree, node_id)
    
    if type_of_node(node) == "terminal":
        return(token(node))
    

    if type_of_node(node) == "disjunctive":      
        try:
            head_child_id = node.find("children/child[@head='true']").attrib["nid"]
            return(get_head(tree, head_child_id))
        except:
            return("__head_unknown__")
    
    
    children = node.findall("children")
    childs = node.findall("children/child")
    if len(children)==1 and type_of_node(get_node(tree,childs[0].attrib["nid"])) != "disjunctive": # wierzcholek jest koniunktywny i ma dzieci koninktywne
        try:
            head_child_id = node.find("children/child[@head='true']").attrib["nid"]
            return(get_head(tree, head_child_id))
        except:
            return("__head_unknown__")
    
        
    else: #mamy wierzcholek koniunktywny, ktorego dzieci sa dysjunktywne
        
        child_ids = [child.attrib["nid"] for child in node.findall("children/child")]
        heads = [get_head(tree, child_id) for child_id in child_ids]
        if len(set(heads))==1: #wszystkie opcje maja taka sama glowe
            return(heads[0])# to glowa wierzcholka dysjunktywnego jest wyznaczona, bo niezalezo od opcji
        else:
            return("__node_with_undetermined_head__")
        #MOZNA TEZ ROZWAZYC CZY NIE POWINNA BYC TO SREDNIA Z EMBEDDINGOW MOZLIWYCH HEAD'OW
    

    
def get_children_rule(tree, node):
    
    if is_terminal(node):
        return("__terminal__")
    
    
    children = node.find("children")
    
    if "rule" in children.attrib.keys():
        return(children.attrib["rule"])
    else:
        children_rules = [get_children_rule(tree,get_node(tree,child.attrib["nid"])) for child in children.findall("child")]
        if len(set(children_rules))==1:
            return(children_rules[0])
        else:
            return("__node_with_undetermined_children_rule__")

def get_info(tree, nid):
    
    node = get_node(tree, nid)
    
    if type_of_node(node) == "terminal":
        
        infos = [get_children_rule(tree, node),
                 node.find("terminal//base").text, 
                 node.find("terminal//f").text]
    else:
        
        
        children_rule = get_children_rule(tree, node)
        
        infos = [x.text for x in node.find("nonterminal").getchildren()]
        categories = infos[0]
        attributes = ":".join(infos[1:]) if len(infos)>1 else "None"
        infos = [children_rule, categories, attributes]
        
        # JESLI NA ZBIORZE TESTOWYM POJAWIA SIE KOMBINACJA ATRYBUTOW, KTOREJ NIE BYLO W ZBIORZE TRENINGOWYM, 
        # TO PRZYPISAC W JEJ MIEJSCE NAJBARDZIEJ PODOBNA 
        # - TYLKO TRZEBA MERYTORYCZNIE WLASCIWIE OKRESLIC PODOBIENSTWO
        
    return(infos)


def get_children_positions_in_graph(tree, node_id, ids):
    
    node = get_node(tree,node_id)
    
    if is_terminal(node):
        return([-1])
    
    else:
        
        children_ids = [child.attrib["nid"] for child in node.findall("children/child")]
        children_positions = [ids.index(child_id) for child_id in children_ids]
        return(children_positions)
    

def is_chosen(node):
    if node.attrib["chosen"]=="true":
        return(1)
    else:
        return(0)

def get_labels(tree, ids):
    labels = [-1 if type_of_node(get_node(tree,node_id)) != "disjunctive" else is_chosen(get_node(tree,node_id)) for node_id in ids]
    return(labels)
    

def get_rule(tree, node_id):
    
    node = get_node(tree, node_id)
    
    node_type = type_of_node(node)
    
    if node_type == "conjunctive" or node_type == "conjunctive_node_with_disjunctive_children":
        
        rule = node.find(".//category").text
    
    elif node_type == "terminal":
        
        rule = "terminal"
    
    elif node_type == "disjunctive":
        
        rule = "disjunctive_node" # to siedzi w: node.find(".//category").text
    
    return(rule)


def get_representation(tree, words2ids, rules2ids):
    
    ids = get_ids(tree)
    children_matrix = seq.pad_sequences([get_children_positions_in_graph(tree,x,ids) for x in ids],value=-1, padding='post')
    labels = get_labels(tree, ids)
    heads = [words2ids.get(get_head(tree,x),-1) for x in ids]
    rules = [rules2ids.get(get_rule(tree,x),-1) for x in ids]
    node_ids = [int(nid) for nid in get_ids(tree)]
    
    types = [int(type_of_node(get_node(tree,i)) == "conjunctive_node_with_disjunctive_children")+
             2*int(type_of_node(get_node(tree,i)) == "disjunctive")
             for i in ids]
    return([heads, 
            rules,
             children_matrix,
             labels,
             list(range(len(children_matrix))),
           types,
           node_ids])


In [3]:
def bidirectional_grapf_representation(x):
    
    dis_positions = list(np.where(np.array(x[5])==2)[0])
    dis_positions.sort()
    backward_positions = dis_positions

    for pos in backward_positions:
        parent_pos = np.where(x[2]==pos)[0]
        if len(parent_pos) > 0:
            new = list(np.setdiff1d(parent_pos,backward_positions))
            if len(new) > 0:
                backward_positions.extend(new)
                backward_positions.sort()

    backward_positions.sort(reverse=True)
    backward_positions = backward_positions[1:]
    
    #print(backward_positions)
    
    parent_downward = []
    n = len(x[2])
    for pos in backward_positions:
        p = np.where(x[2]==pos)[0][0]
        posi_in_downward = np.where(np.array(backward_positions)==p)[0]
        if len(posi_in_downward)>0:
            parent_downward.append(n+posi_in_downward[0])
        else:
            parent_downward.append(n-1)

    is_downward = np.concatenate(([0]*n, [1]*len(parent_downward)))
    chd2 = -1*np.ones((len(parent_downward),len(x[2][0])))
    chd2[:,0] = parent_downward

    new_x = []
    new_x.append(np.concatenate([x[0],np.array(x[0])[np.array(backward_positions)]]))
    new_x.append(np.concatenate([x[1],np.array(x[1])[np.array(backward_positions)]]))
    new_x.append(np.concatenate((x[2],chd2)))
    new_x.append(np.concatenate([x[3],np.array(x[3])[np.array(backward_positions)]]))
    new_x.append(np.concatenate([x[4],range(len(x[4]), len(x[4])+len(backward_positions))]))
    new_x.append(np.concatenate([x[5],np.array(x[5])[np.array(backward_positions)]]))
    new_x.append(is_downward)
    new_x.append(np.concatenate([x[6],np.array(x[6])[np.array(backward_positions)]]))
    
    return(new_x)

In [4]:
class BiTreeLSTM(object):  

    def __init__(self, h_dim, nc, w2v_model_path, file_with_rules, 
                 rules_emb_dim, max_phrase_length, emb_dropout_rate, h_dropout_rate, l, srng,
                train_only_downward = True, load_params=None): 

        '''

        - dropout stanu ukrytego (LSTM_1)
        - dropout embeddinga (LSTM_1)
        - regularyzacja l2 (LSTM_1)
        - indywidualna obsluga lisci - struktura taka sama, macierze te same, ale uczymy: h_aggregated_0, hidden_state_0, cell_state_0, zamiast brac w te miejsca 0


        nh :: dimension of hidden state
        nc :: number of classes
        '''

        train_only_downward = int(train_only_downward)

        w2vecs = pickle.load(open(w2v_model_path,"rb"))
        self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
        self.words2ids = w2vecs["words2ids"]

        emb_dim = w2vecs["vectors"].shape[1]
        del w2vecs

        
        r = open(file_with_rules,"r")
        rules = [x.split() for x in r.readlines()]
        r.close()
        unique_rules = set()
        for i in range(len(rules)):
            for j in range(len(rules[i])):
                unique_rules.add(rules[i][j])
        
        unique_rules.add("terminal")
        unique_rules.add("disjunctive_node")
        
        number_of_uniue_rules = len(unique_rules)
 
        r = 0.05

        self.rules2ids = dict(zip(unique_rules,range(number_of_uniue_rules)))
        self.emb_rules = theano.shared(r * np.random.uniform(-1,1,(number_of_uniue_rules+1, rules_emb_dim)).astype(theano.config.floatX))
        
   

        self.W_i = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_i = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_i = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        self.W_f = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_f = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_f = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
        
        self.W_o = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_o = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_o = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        self.W_u = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_u = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_u = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        self.W_y   = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, nc)).astype(theano.config.floatX))
        self.b_y   = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))



        self.W_i_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_i_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_i_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        self.W_f_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_f_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_f_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
        
        self.W_o_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_o_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_o_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        self.W_u_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_u_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_u_dis = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))

        
        self.W_downward= theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim+rules_emb_dim, h_dim) ).astype(theano.config.floatX))
        self.U_downward= theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
        self.b_downward = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))



        self.h_aggregated_0 = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
        self.cell_state_0 = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
        self.hidden_state_0 = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))



        self.srng = srng
        self.h_dropout_rate = h_dropout_rate
        self.emb_dropout_rate = emb_dropout_rate
        self.l = l


        if load_params:
            load_params = pickle.load(open(load_params,"rb"))
            if type(load_params)==list:
                load_params = dict(load_params)
            for key in load_params.keys():
                if key not in ['emb', 'emb_rules', 
                               'W_i', 'U_i', 'b_i', 
                               'W_f', 'U_f', 'b_f', 
                               'W_o', 'U_o', 'b_o', 
                               'W_u', 'U_u', 'b_u', 
                               'W_i_dis', 'U_i_dis', 'b_i_dis', 
                               'W_f_dis', 'U_f_dis', 'b_f_dis', 
                               'W_o_dis', 'U_o_dis', 'b_o_dis', 
                               'W_u_dis', 'U_u_dis', 'b_u_dis', 
                               'W_y', 'b_y', 
                               'W_downward','U_downward','b_downward',
                               'h_aggregated_0', 'cell_state_0', 'hidden_state_0']:
                    setattr(self, key, load_params[key])
                else:
                    setattr(self, key, theano.shared(load_params[key]))
        
        

        def one_step(word_id, rule_id, word_children_positions, y_true, k, node_type, direction, hidden_states, cell_states, learning_rate):

            x = T.concatenate( [self.emb[word_id], self.emb_rules[rule_id] ])
            # dla downward rozwazyc doklejene h z przechodzenia w gore (z liczeniem gradientu lub bez)

            #dropout:
            mask1 = self.srng.binomial(n=1, p=1-self.emb_dropout_rate, size=(emb_dim+rules_emb_dim,), dtype='floatX')
            x = x * mask1


            tmp = word_children_positions>=0.0
            number_of_children = tmp.sum(dtype = theano.config.floatX) 
                  
            #idx_tmp = tmp.nonzero()                                    
      
            h_aggregated = ifelse(T.gt(number_of_children, 0.0), 
                                  ifelse(T.eq(node_type,1),
                                           hidden_states[word_children_positions].mean(axis=0),
                                           hidden_states[word_children_positions].sum(axis=0)), 
                                  self.h_aggregated_0)

   
            i = ifelse(T.eq(node_type,1),
                         T.nnet.sigmoid(	T.dot(x, self.W_i_dis) + T.dot(h_aggregated, self.U_i_dis) + self.b_i_dis),
                         T.nnet.sigmoid(	T.dot(x, self.W_i) + T.dot(h_aggregated, self.U_i) + self.b_i))             

            o = ifelse(T.eq(node_type,1),
                         T.nnet.sigmoid(	T.dot(x, self.W_o_dis) + T.dot(h_aggregated, self.U_o_dis) + self.b_o_dis),
                         T.nnet.sigmoid(	T.dot(x, self.W_o) + T.dot(h_aggregated, self.U_o) + self.b_o))             

            u = ifelse(T.eq(node_type,1),
                         T.tanh(	T.dot(x, self.W_u_dis) + T.dot(h_aggregated, self.U_u_dis) + self.b_u_dis),
                         T.tanh(	T.dot(x, self.W_u) + T.dot(h_aggregated, self.U_u) + self.b_u))            

            f_c = ifelse(T.gt(number_of_children, 0.0), 
                 ifelse(T.eq(node_type,1),
                (T.nnet.sigmoid( T.dot(x, self.W_f_dis ) + T.dot(hidden_states[word_children_positions], self.U_f_dis)  + self.b_f_dis )*cell_states[word_children_positions]).sum(axis=0),          
                (T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[word_children_positions], self.U_f)  + self.b_f )*cell_states[word_children_positions]).sum(axis=0)),
                T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(self.hidden_state_0, self.U_f)  + self.b_f ) * self.cell_state_0
            )


            c = i*u + f_c

            h = ifelse(T.eq(direction,1), 
                       T.nnet.sigmoid(T.dot(x, self.W_downward) + T.dot(h_aggregated, self.U_downward) + self.b_downward), 
                       o * T.tanh(c))
            
            #dropout:
            mask = self.srng.binomial(n=1, p=1-self.h_dropout_rate, size=(h_dim,), dtype='floatX')
            h = h * mask

            current_cell_state = cell_states[k]
            cell_states_new = T.set_subtensor(current_cell_state, c)

            current_hidden_state = hidden_states[k]
            hidden_states_new = T.set_subtensor(current_hidden_state, h)


            y_prob = T.nnet.softmax(T.dot(h,self.W_y) + self.b_y)[0]

            cross_entropy = ifelse(T.eq(node_type,2), 
                                   ifelse(T.eq(train_only_downward,1), direction*(-T.log(y_prob[y_true])),-T.log(y_prob[y_true])),
                                   0.0)						      

            return cross_entropy, hidden_states_new, cell_states_new  


        y = T.vector('y',dtype=dataType)
        learning_rate = T.scalar('lr',dtype=theano.config.floatX)
        words = T.vector(dtype=dataType)
        rules = T.vector(dtype=dataType)
        children_positions = T.matrix(dtype=dataType)
        words_indexes = T.vector(dtype=dataType)
        node_types = T.vector(dtype=dataType)
        directions = T.vector(dtype=theano.config.floatX)

        [cross_entropy_vector, _, _] , _ = theano.scan(fn=one_step, \
                                 sequences = [words, rules, children_positions, y, words_indexes, node_types, directions],
                                 outputs_info = [None,
                                                 T.zeros((T.shape(words)[0]+1,h_dim), dtype = theano.config.floatX),
                                                 T.zeros((T.shape(words)[0]+1,h_dim), dtype = theano.config.floatX)],
                                 non_sequences = learning_rate)#,
                                 #n_steps = words.shape[0])

        cost = T.mean(cross_entropy_vector) + self.l * (self.emb_rules**2).sum() 
        
        updates = OrderedDict([
            (self.W_i, self.W_i-learning_rate*T.grad(cost, self.W_i)),
            (self.W_f, self.W_f-learning_rate*T.grad(cost, self.W_f)),
            (self.W_o, self.W_o-learning_rate*T.grad(cost, self.W_o)),
            (self.W_u, self.W_u-learning_rate*T.grad(cost, self.W_u)),
            (self.W_y, self.W_y-learning_rate*T.grad(cost, self.W_y)),

            (self.U_i, self.U_i-learning_rate*T.grad(cost, self.U_i)),
            (self.U_f, self.U_f-learning_rate*T.grad(cost, self.U_f)),
            (self.U_o, self.U_o-learning_rate*T.grad(cost, self.U_o)),
            (self.U_u, self.U_u-learning_rate*T.grad(cost, self.U_u)),

            #(self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #SPROBOWAC TU 0.1 ZAMIAST LR, A DLA POLSKICH BEZ AKTUALIZACJI EMB
            (self.emb_rules, self.emb_rules-learning_rate*T.grad(cost, self.emb_rules)),
            (self.b_i, self.b_i-learning_rate*T.grad(cost,self.b_i)),
                        (self.b_f, self.b_f-learning_rate*T.grad(cost,self.b_f)),
                        (self.b_o, self.b_o-learning_rate*T.grad(cost,self.b_o)),
                        (self.b_u, self.b_u-learning_rate*T.grad(cost,self.b_u)),
                        (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y)),

            (self.W_i_dis, self.W_i_dis-learning_rate*T.grad(cost, self.W_i_dis)),
            (self.W_f_dis, self.W_f_dis-learning_rate*T.grad(cost, self.W_f_dis)),
            (self.W_o_dis, self.W_o_dis-learning_rate*T.grad(cost, self.W_o_dis)),
            (self.W_u_dis, self.W_u_dis-learning_rate*T.grad(cost, self.W_u_dis)),

            (self.U_i_dis, self.U_i_dis-learning_rate*T.grad(cost, self.U_i_dis)),
            (self.U_f_dis, self.U_f_dis-learning_rate*T.grad(cost, self.U_f_dis)),
            (self.U_o_dis, self.U_o_dis-learning_rate*T.grad(cost, self.U_o_dis)),
            (self.U_u_dis, self.U_u_dis-learning_rate*T.grad(cost, self.U_u_dis)),

            #(self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #SPROBOWAC TU 0.1 ZAMIAST LR, A DLA POLSKICH BEZ AKTUALIZACJI EMB
            #(self.emb_rules, self.emb_rules-learning_rate*T.grad(cost, self.emb_rules)),
            (self.b_i_dis, self.b_i_dis-learning_rate*T.grad(cost,self.b_i_dis)),
                        (self.b_f_dis, self.b_f_dis-learning_rate*T.grad(cost,self.b_f_dis)),
                        (self.b_o_dis, self.b_o_dis-learning_rate*T.grad(cost,self.b_o_dis)),
                        (self.b_u_dis, self.b_u_dis-learning_rate*T.grad(cost,self.b_u_dis)),            
            
            (self.W_downward, self.W_downward-learning_rate*T.grad(cost, self.W_downward)),
            (self.U_downward, self.U_downward-learning_rate*T.grad(cost, self.U_downward)),
            (self.b_downward, self.b_downward-learning_rate*T.grad(cost, self.b_downward)),
            
            
            (self.h_aggregated_0, self.h_aggregated_0-learning_rate*T.grad(cost,self.h_aggregated_0)),
            (self.cell_state_0, self.cell_state_0-learning_rate*T.grad(cost,self.cell_state_0)),
            (self.hidden_state_0, self.hidden_state_0-learning_rate*T.grad(cost,self.hidden_state_0))

            ])

        self.train = theano.function( inputs  = [words, rules, children_positions, y, words_indexes, node_types, directions, learning_rate],
                                      outputs = [],
                                      updates = updates,
                                      allow_input_downcast=True,
                                      mode='FAST_RUN'
                                      )


        def one_step_classify(word_id, rule_id, word_children_positions, k, node_type, direction, hidden_states, cell_states):

            x = T.concatenate( [self.emb[word_id], self.emb_rules[rule_id] ])

            x = (1-self.emb_dropout_rate) * x

            tmp = word_children_positions>=0.0
            number_of_children = tmp.sum(dtype = theano.config.floatX) 
            #idx_tmp = tmp.nonzero()                                                                   # indeksy realne dzieci - czyli te, gdzie nie ma -1        

            h_aggregated = ifelse(T.gt(number_of_children, 0.0), 
                                  ifelse(T.eq(node_type,1),
                                           hidden_states[word_children_positions].mean(axis=0),
                                           hidden_states[word_children_positions].sum(axis=0)), 
                                  self.h_aggregated_0)


            i = ifelse(T.eq(node_type,1),
                         T.nnet.sigmoid(	T.dot(x, self.W_i_dis) + T.dot(h_aggregated, self.U_i_dis) + self.b_i_dis),
                         T.nnet.sigmoid(	T.dot(x, self.W_i) + T.dot(h_aggregated, self.U_i) + self.b_i))             

            o = ifelse(T.eq(node_type,1),
                         T.nnet.sigmoid(	T.dot(x, self.W_o_dis) + T.dot(h_aggregated, self.U_o_dis) + self.b_o_dis),
                         T.nnet.sigmoid(	T.dot(x, self.W_o) + T.dot(h_aggregated, self.U_o) + self.b_o))             

            u = ifelse(T.eq(node_type,1),
                         T.tanh(	T.dot(x, self.W_u_dis) + T.dot(h_aggregated, self.U_u_dis) + self.b_u_dis),
                         T.tanh(	T.dot(x, self.W_u) + T.dot(h_aggregated, self.U_u) + self.b_u))            

            f_c = ifelse(T.gt(number_of_children, 0.0), 
                 ifelse(T.eq(node_type,1),
                    (T.nnet.sigmoid( T.dot(x, self.W_f_dis ) + T.dot(hidden_states[word_children_positions], self.U_f_dis)  + self.b_f_dis )*cell_states[word_children_positions]).sum(axis=0),          
                    (T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[word_children_positions], self.U_f)  + self.b_f )*cell_states[word_children_positions]).sum(axis=0)),
                T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(self.hidden_state_0, self.U_f)  + self.b_f ) * self.cell_state_0
            )

            c = i*u + f_c

            h = ifelse(T.eq(direction,1), 
                       T.nnet.sigmoid(T.dot(x, self.W_downward) + T.dot(h_aggregated, self.U_downward) + self.b_downward), 
                       o * T.tanh(c))# podczas uczenia zerowalismy 1-dropout_rate procent wspolrzednych, wiec trzeba to uzgodnić
            h = h * (1-self.h_dropout_rate)

            current_cell_state = cell_states[k]
            cell_states_new = T.set_subtensor(current_cell_state, c)

            current_hidden_state = hidden_states[k]
            hidden_states_new = T.set_subtensor(current_hidden_state, h)


            y_prob = ifelse(T.eq(node_type,2),T.nnet.softmax(T.dot(h,self.W_y) + self.b_y)[0],-1*T.ones(nc))            

            return  y_prob, hidden_states_new, cell_states_new


        [y_probs_classify, _ , _ ], _ = theano.scan(
                 fn=one_step_classify, 
                                 sequences = [words, rules, children_positions, words_indexes, node_types, directions],
                 outputs_info = [None,
                                 T.zeros((T.shape(words)[0]+1,h_dim), dtype = theano.config.floatX),
                                 T.zeros((T.shape(words)[0]+1,h_dim), dtype = theano.config.floatX)])

        predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]), 
                                     sequences = [words_indexes])
        
        probs, _ = theano.scan(lambda i: y_probs_classify[i], 
                                     sequences = [words_indexes])

        self.classify = theano.function(inputs=[words, rules, children_positions, words_indexes, node_types, directions], 
                                     outputs=predictions,
                                     allow_input_downcast=True,
                                     mode='FAST_RUN' 
                                     )

        self.predict_proba = theano.function(inputs=[words, rules, children_positions,words_indexes, node_types, directions], 
                             outputs=probs,
                             allow_input_downcast=True,
                             mode='FAST_RUN' 
                             )

        self.calculate_loss = theano.function(inputs=[words, rules, children_positions, y, words_indexes, node_types, directions, learning_rate], 
                     outputs=cost,
                     allow_input_downcast=True,
                     mode='FAST_RUN' 
                     )
        
    def save_model(self,path):
        params = [ (k, v.get_value())  if type(v)==theano.tensor.sharedvar.TensorSharedVariable else (k,v) for k, v in list(self.__dict__.items())]
        params = dict(params)
        pickle.dump(params,open(path,"wb"))

        

In [5]:
import numpy as np
import random
import time
import os
import itertools
import pickle
import  csv
from collections import Counter, OrderedDict

from keras.preprocessing import sequence as seq

import os    
#os.environ['THEANO_FLAGS'] = "optimizer = None"

import theano
from theano import tensor as T
from theano.ifelse import ifelse
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 

s = {'lr':0.1,
         'nepochs':80,
         'seed':345,
         'nc':2,        # number of y classes
         'h_dim': 100,
         'h_dropout_rate': 0,
         'emb_dropout_rate': 0,
         'time_without_improvement': 10,
         'batch_size': 1,
         'w2v_DIM': str(300),
         "rules_emb_dim": 20
         }  

dataType = 'int64'
  
np.random.seed(s['seed']) 

#ile_with_filtered_embeddings = "embeddings/filtered_nkjp+wiki-forms-restricted-300-cbow-ns.pkl"
#2vecs = pickle.load(open(file_with_filtered_embeddings,"rb"))

rnn = BiTreeLSTM( h_dim = s['h_dim'],
            nc = s['nc'],
        w2v_model_path = "embeddings/filtered_train_and_test_w2v_allwiki_nkjpfull_300.pkl",
            max_phrase_length = -1, # w tej wersji modelu zmienna nie jest wykorzystywana
        emb_dropout_rate = s['emb_dropout_rate'],
        h_dropout_rate = s['h_dropout_rate'],
        l = 0.0001,
        srng = RandomStreams(12345),
        file_with_rules =  "/home/norbert/Doktorat/SyntacticTreesDisambiguation/Składnica_preprocessed_training_data/rules.txt",
        rules_emb_dim = s["rules_emb_dim"],
        load_params= "/home/norbert/Doktorat/SyntacticTreesDisambiguation/Model/model_bidirectional_forest_params_4.pkl"
    )




In [6]:
from copy import deepcopy

In [7]:
data_folder = "/home/norbert/Doktorat/SyntacticTreesDisambiguation/Składnica_raw_data/Test/*.xml"
files = glob.glob(data_folder,recursive=True)
data_test = []
print(len(list(files)))
forests = []
for i, file in enumerate(files):
    
    if i % 10 == 0:
        print(i)
    forest = ET.parse(file)
    forests.append(deepcopy(forest))
    transform_to_graph_with_con_and_dis_nodes(forest)
    try:
        data_test.append(bidirectional_grapf_representation(get_representation(forest,rnn.words2ids,rnn.rules2ids)))
    except:
        forests.pop()


2000
0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
1450
1460
1470
1480
1490
1500
1510
1520
1530
1540
1550
1560
1570
1580
1590
1600
1610
1620
1630
1640
1650
1660
1670
1680
1690
1700
1710
1720
1730
1740
1750
1760
1770
1780
1790
1800
1810
1820
1830
1840
1850
1860
1870
1880
1890
1900
1910
1920
1930
1940
1950
1960
1970
1980
1990


In [63]:
1270 - po wypisaniu tego stanal na 10 godzin

30

In [8]:
len(data_test), len(forests)

(1881, 1881)

In [9]:
pickle.dump(zip(data_test, forests),open("/home/norbert/Doktorat/SyntacticTreesDisambiguation/Składnica_preprocessed_test_data/bi_data_as_graphs_and_forests.pkl","wb"))

In [8]:
data = pickle.load(open("/home/norbert/Doktorat/SyntacticTreesDisambiguation/Składnica_preprocessed_test_data/bi_data_as_graphs_and_forests.pkl","rb"))

EOFError: Ran out of input

In [13]:
from sklearn.metrics import f1_score

In [16]:
f1 = []
i = 0
for forest, graph in zip(forests, data_test):
    
    nodes_ids_predicted = disambiguate(graph, rnn)
    
    nodes_ids_in_positive_tree = [int(x.attrib["nid"]) for x in forest.findall("node[@chosen='true']")]
    
    terimals_ids = [int(x) for x in terminals(forest)[1]]
    #print(terimals_ids)
    
    nodes_ids_predicted = np.setdiff1d(nodes_ids_predicted,terimals_ids)
    nodes_ids_in_positive_tree = np.setdiff1d(nodes_ids_in_positive_tree,terimals_ids)
    #WYRZUCIC TERMINALE
    
    
    #print(sorted(nodes_ids_predicted))
    #print(sorted(nodes_ids_in_positive_tree))
    n = get_maximum_node_id(forest)
    labels_true = np.zeros(n+1)
    labels_predicted = np.zeros(n+1)
    labels_true[nodes_ids_in_positive_tree] = 1
    labels_predicted[nodes_ids_predicted] = 1
    f1.append(f1_score(y_pred=labels_predicted, y_true=labels_true))
    if i % 10 == 0:
        print(np.mean(f1))
    i += 1
    #print(f1_score(y_pred=labels_predicted, y_true=labels_true))

0.9090909090909091
0.8598173249188253
0.8552543503537526
0.8791015183057099
0.8745969410858754
0.8487281268195666
0.8494780288528888
0.8515538345174657
0.8466942984248678
0.8465222811511685
0.8575534348341882
0.8561455406913536
0.8497806741207696
0.8445751143994514
0.8410083698772888
0.8366650991223754
0.8371928327664315
0.8351342526144468
0.8389328659685623
0.8353063313607597
0.8319226576445843
0.8312434272477297
0.8348721724430743
0.837360867675997
0.8373480541932835
0.838870742419668
0.8384909938233709
0.8393763320634945
0.83977491922094
0.842852299049042
0.8422009983746388
0.8412328869101674
0.8400903005491964
0.8394335802897647
0.8404747463621586
0.8417767769157899
0.8433889886433233
0.8437165656006619
0.8449342261609358
0.8446834082596792
0.8451271984545841
0.8430845808509184
0.8422549922303173
0.8422686302693498
0.8417420281899259
0.842853665294087
0.8437061414409281
0.843440174459011
0.8442238883226927
0.8419598270697292
0.8429005981554246
0.8428808600981078
0.8429921814384738


In [None]:
8395

In [152]:
k = 0
pred  = rnn.predict_proba(data_test[k][0], data_test[k][1] ,data_test[k][2], data_test[k][4], data_test[k][5], data_test[k][6])
np.column_stack((pred[:,1]*100, data_test[k][7], range(len(pred))))

array([[-100.        ,    5.        ,    0.        ],
       [-100.        ,    9.        ,    1.        ],
       [-100.        ,   13.        ,    2.        ],
       [-100.        ,   17.        ,    3.        ],
       [-100.        ,   20.        ,    4.        ],
       [-100.        ,   30.        ,    5.        ],
       [-100.        ,   35.        ,    6.        ],
       [-100.        ,   37.        ,    7.        ],
       [-100.        ,   41.        ,    8.        ],
       [-100.        ,   46.        ,    9.        ],
       [-100.        ,   49.        ,   10.        ],
       [-100.        ,   23.        ,   11.        ],
       [-100.        ,    4.        ,   12.        ],
       [-100.        ,    8.        ,   13.        ],
       [-100.        ,   12.        ,   14.        ],
       [-100.        ,   16.        ,   15.        ],
       [-100.        ,   19.        ,   16.        ],
       [-100.        ,   29.        ,   17.        ],
       [-100.        ,   34.

In [58]:
downward_nodes = data_test[k][7][data_test[k][6]==1]
downward_nodes

array([ 3, 23, 22])

In [153]:
data_test[k]

[array([31619,    16, 14836,   878,  1428,  2127,  4583,     4,     5,
          384,   242,    -1, 31619,    16, 14836,   878,  1428,  2127,
         4583,  4583,  4583,     4,     5,   384,   242,    -1, 31619,
        14836, 14836, 14836,   878,   878,  1428,  1428,  2127,  4583,
         4583,  4583,  4583,   384,   384,   242,   242, 31619,    16,
           16,   878,   878,   878,   878,  1428,  1428,   878, 14836,
          878, 31619,  1428,  1428,  1428,  2127,  4583,   384,   384,
          242,   384,   242, 31619,   878, 14836,   878,   878,  1428,
        14836,   242,     5,   878,    16,   878, 14836, 31619, 14836,
            5,     5,     5,     5,     5, 31619,  1428, 31619,  1428,
          878,    16,    16,     5,     5,     5,     5,  4583,     5,
            5,     5,  1428, 31619,    16, 31619,  4583,  4583,  4583,
         2127, 31619, 31619,  4583, 31619,  4583,  2127,  2127,  2127,
         2127,  2127,  2127,  2127,  2127,  2127,  4583, 31619,  4583,
      

In [147]:
np.array(np.column_stack((pred[:,1]*100, data_test[k][2], data_test[k][5], data_test[k][7], range(len(pred)))),dtype=int)

array([[-100,   -1,   -1,   -1,   -1,   -1,   -1,   -1,    0,    5,    0],
       [-100,   -1,   -1,   -1,   -1,   -1,   -1,   -1,    0,    9,    1],
       [-100,   -1,   -1,   -1,   -1,   -1,   -1,   -1,    0,   13,    2],
       [-100,   -1,   -1,   -1,   -1,   -1,   -1,   -1,    0,   16,    3],
       [-100,   -1,   -1,   -1,   -1,   -1,   -1,   -1,    0,   37,    4],
       [-100,    0,   -1,   -1,   -1,   -1,   -1,   -1,    0,    4,    5],
       [-100,    0,   -1,   -1,   -1,   -1,   -1,   -1,    0,   19,    6],
       [-100,    0,   -1,   -1,   -1,   -1,   -1,   -1,    0,   26,    7],
       [-100,    0,   -1,   -1,   -1,   -1,   -1,   -1,    0,   30,    8],
       [-100,    0,   -1,   -1,   -1,   -1,   -1,   -1,    0,   34,    9],
       [-100,    1,   -1,   -1,   -1,   -1,   -1,   -1,    0,    8,   10],
       [-100,    2,   -1,   -1,   -1,   -1,   -1,   -1,    0,   12,   11],
       [-100,    3,   -1,   -1,   -1,   -1,   -1,   -1,    0,   15,   12],
       [-100,    4,   -1,

In [11]:
def disambiguate(x, model):

    pred  = rnn.predict_proba(x[0], x[1] ,x[2], x[4], x[5], x[6])

    
    start = np.where(x[7]==0)[0][0]
    chosen = np.zeros(start+1)
    chosen[start] = 1
    for i in range(start,-1,-1):
        #print(i,chosen[i])
        children_pos = np.array(x[2][i],dtype=int)

        if np.all(children_pos==-1):
            #chosen[i] = 1
            continue
        else:
            children_pos = children_pos[children_pos>-1]
            #print(children_pos)
            if chosen[i] == 1:
                if x[5][i] == 1:
                    #print(children_pos[np.argmax(pred[children_pos,1])])
                    chosen[children_pos[np.argmax(pred[children_pos,1])]] = 1
                else:
                    chosen[children_pos] = 1
                    
                #print(chosen[children_pos])
    #print(np.array(np.column_stack((range(start+1),100*pred[:,1][:start+1], chosen[:start+1], x[2][:start+1], x[7][:start+1])),dtype=int))
    #print(chosen)
    chosen = x[7][:start+1][chosen==1] #id wybranych wierzcholkow
    chosen = np.setdiff1d(chosen, x[7][:start+1][x[5][:start+1]==2])
                  
    return(chosen)

In [166]:
np.set_printoptions(threshold=np.nan)

In [177]:
nodes_ids_predicted = disambiguate(data_test[k],rnn)

118 1.0
[117  25]
[1. 1.]
117 1.0
[116 108 114 115]
[1. 0. 0. 0.]
116 1.0
[ 43 103  59 113]
[1. 1. 1. 1.]
115 0.0
[112  59  60 100]
114 0.0
[112  59 113]
113 1.0
[111]
[1.]
112 0.0
[110]
111 1.0
[ 97 105 106 107]
[0. 1. 0. 0.]
110 0.0
[ 55  66  79 104  88 109 102  86]
109 0.0
[ 26 101]
108 0.0
[ 43 103  59  60 100]
107 0.0
[37 21 99]
106 0.0
[36 21 98]
105 1.0
[35 21 93]
[1. 1. 1.]
104 0.0
[26 92]
103 1.0
[91]
[1.]
102 0.0
[26 90]
101 0.0
[51 57 87 58 89]
100 0.0
[85 96]
99 0.0
[84 95]
98 0.0
[83 94]
97 0.0
[35 21 82]
96 0.0
[81]
95 0.0
[81]
94 0.0
[81]
93 1.0
[81]
[1.]
92 0.0
[13 80]
91 1.0
[13 80]
[1. 1.]
90 0.0
[54 77]
89 0.0
[76 33]
88 0.0
[26 76 32]
87 0.0
[75 33]
86 0.0
[26 75 32]
85 0.0
[74]
84 0.0
[74]
83 0.0
[74]
82 0.0
[74]
81 1.0
[22 73]
[1. 1.]
80 1.0
[53 72 78]
[0. 0. 1.]
79 0.0
[26 44 71]
78 1.0
[27 71]
[1. 1.]
77 0.0
[69 32]
76 0.0
[13 68]
75 0.0
[70 67]
74 0.0
[22 64]
73 1.0
[63 65]
[1. 0.]
72 0.0
[27 52]
71 1.0
[50 56]
[1. 0.]
70 0.0
[48]
69 0.0
[48]
68 0.0
[27 46]
67 

In [160]:
sorted(nodes_ids_predicted)

[0, 1, 23, 52]

In [None]:
[0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 36]
[0, 1, 7, 8, 12, 14, 15, 17, 18, 19, 21, 22, 23, 36]

In [159]:
file = files[0]
print(file)
forest = ET.parse(file)
ET.dump(forest)

/home/norbert/Doktorat/SyntacticTreesDisambiguation/Składnica_raw_data/Test/NKJP_1M_3102000009__morph_10-p__morph_10.12-s.xml
<forest grammar_no="1505562921" sent_id="NKJP_1M_3102000009/morph_10-p/morph_10.12-s">
  <text>Wania za namową swego ojca postanowił przenieść się do naszej szkoły.</text>
  <startnode from="0" to="12">wypowiedzenie</startnode>
  <stats cputime="0.5484242260000001" inferences="612744" nodes="81" trees="276" />
    <answer-data>
        <base-answer type="FULL" username="none">
            <comment>AUTO</comment>
        </base-answer>
        <extra-answer type="FULL" username="paulinar">
            <comment>AUTO</comment>
        </extra-answer>
        <extra-answer type="FULL" username="sebastianz">
            <comment>AUTO</comment>
        </extra-answer>
    </answer-data>
  <node chosen="true" from="0" nid="0" subtrees="276" to="12">
    <nonterminal>
      <category>wypowiedzenie</category>
    </nonterminal>
    <children chosen="true" rule="w">
     

In [39]:
nodes_ids_in_positive_tree = [int(x.attrib["nid"]) for x in forest.findall("node[@chosen='true']")]
labels_positive = np.zeros_like(data_test[k][0])
labels_positive[nodes_ids_chosen_in_positive_tree] = 1

In [40]:
start = np.where(x[7]==0)[0][0]
labels_positive[:start][x[7]]

IndexError: index 23 is out of bounds for axis 1 with size 23

In [41]:
nodes_ids_chosen_in_positive_tree

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21]

In [42]:
x = data_test[k]

In [44]:
n = get_maximum_node_id(forest)
labels_true = np.zeros(n+1)
labels_predicted = np.zeros(n+1)
labels_true[nodes_ids_in_positive_tree] = 1
labels_predicted[nodes_ids_predicted] = 1

In [45]:
labels_true, labels_predicted

(array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
        0., 0., 0., 1., 1.]),
 array([1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1.,
        1., 1., 1., 1., 0.]))