In [20]:
import xml.etree.ElementTree as ET
import numpy as np
from copy import deepcopy
from pprint import pprint
#import warnings

%pprint

Pretty printing has been turned ON


In [2]:
tree = ET.parse('../Składnica-frazowa-171220/NKJP_1M_0401000002/morph_2-p/morph_2.52-s.xml')
tree

<xml.etree.ElementTree.ElementTree object at 0x7f37392b0630>

In [3]:
tree = ET.parse('../Składnica-frazowa-171220/NKJP_1M_0401000002/morph_4-p/morph_4.67-s.xml')
tree

<xml.etree.ElementTree.ElementTree object at 0x7f37201b1390>

In [52]:
def _check_sentence(xml_tree, accept_tags=["forest","tree"]):
    
    """
    Funkcja sprawdza poprawnosc wypowiedzenia i arumentu: 
    - czy istnieje dla niego poprawne drzewo - wypowiedzenie jest poprawne jesli base_answer na polu "type" ma wartosc "FULL".
    - arumentem powinno byc drzewo o tagu korzenia rownym "forest" lub "tree".
    [W oryginalnych plikach z lasami jest to "forest", natomiast gdy z lasu tworzone sa pojedyncze drzewa,
    to maja one tag "tree"]
    
    xml_tree - las drzew lub drzewo [xml.etree.ElementTree.ElementTree]
    """
    
    if type(xml_tree) != ET.ElementTree:
        raise AssertionError("Bad argument type")
    
    
    if type(accept_tags) == str:
        accept_tags = [accept_tags]
    
    
    if not xml_tree.getroot().tag in accept_tags:
        raise AssertionError('Argument in not in [' + ",".join(accept_tags) + '] - it has tag "' + xml_tree.getroot().tag + '"' )
    
    
    base_answer_type = xml_tree.getroot().find('.//answer-data//base-answer').attrib["type"]
    correct = base_answer_type == "FULL"

    if not correct:
        raise AssertionError("Sentence is not correct: Node <base-answer> has type value " + base_answer_type  + " instead of 'FULL'")
        
    pass


def get_random_tree(forest, random_state=None):
    
    """
    Funkcja zwraca losowe drzewo z upakowanego lasu (forest).
    Dla lasu, w ktorym nie ma poprawnego drzewa funkcja wyrzuca blad.
    
    forest - las drzew [xml.etree.ElementTree.ElementTree]
    """

    # sprawdzenie poprawnosci lasu i ewentualne wypisanie komunikatu
    _check_sentence(forest,"forest")
    
    # ustawiamy ziarno
    if random_state is not None:
        np.random.seed(random_state)
            
            
    root_old = forest.getroot()
    root_new = ET.Element("tree",root_old.attrib)
    
    
    # las sklada sie z drzew (wezly "node") oraz dodatkowych danych (inne wezly) -
    # tresc wypowiedzenia, statystyki lasu, itd. - i tutaj przepisujemy te wezly
    features = root_old.getchildren()
    for feature in features:
        if feature.tag != "node": 
            feature_copy = deepcopy(feature)
            if feature_copy.tag == "stats":
                feature_copy.tag = "forest-stats"
                
            root_new.append(feature_copy) # modyfikujemy tag wezla wiec potrzebna kopia, zeby nie zmodyfikowac oryginalnego drzewa
    
    # definiujemy wezel ze statystykami drzewa
    # robimy to w tym iejscu zeby zachowac logiczna kolejnosc wezlow - zeby wypisywalo sie to na poczatku
    # wartosci nadamy nizej
    ET.SubElement(root_new, "tree-stats", {"height":"0","nodes":"0"})
            
            
    # definiujemy rekurencyjna funkcje, ktora bedzie przechodzic po lesie i
    # kolekcjonowac wezly, tworzac losowe drzewo.
    # drzewo jest tworzone na korzeniu root_new.
    def add_random_children(current_node_old):
        
        current_node_new = ET.SubElement(root_new, current_node_old.tag, current_node_old.attrib)
        
        features = current_node_old.getchildren()
        # kazdy "node" jest terminalem albo nieterminalem i ma opis wlasnosci
        # i tutaj wyciagamy te wlasnosci z wezla innego niz "children"
        for feature in features:
            if feature.tag != "children": 
                current_node_new.append(feature)
        
        children_old = current_node_old.findall("children")
        if len(children_old) == 0: #jestesmy w lisciu wiec konczymy dzialanie funkcji
            return None
        random_children_old = children_old[np.random.choice(len(children_old),1)[0]]
        random_children_new = ET.SubElement(current_node_new, random_children_old.tag, random_children_old.attrib)
        for child_old in random_children_old.getchildren():
            x = ET.SubElement(random_children_new, child_old.tag, child_old.attrib)
            next_node = root_old.find('.//node[@nid="' + x.attrib["nid"] + '"]')
            add_random_children(next_node)
        
    
        # wezel startowy (przyjmujemy, ze node z id=0 jest zawsze pierwszy):
    # TODO: upewnic sie czy to jest poprawne podejscie - czy moze byc inny wezel poczatkowym
    node_0 = root_old.find('.//node[@nid="0"]') 
    
    # konstruujemy drzewo:
    add_random_children(node_0)
    
    new_tree = ET.ElementTree(root_new)
    
    th = _tree_height(new_tree, node_id=0)
    
    root_new.find("tree-stats").attrib["height"] = str(th)
    root_new.find("tree-stats").attrib["nodes"] = str(len(root_new.findall("node")))
    
    return new_tree
       
 

def number_of_trees_in_forest(forest):

    """
    Funkcja zwraca liczbe drzew w lesie forest.
    
    forest - las drzew [xml.etree.ElementTree.ElementTree]
    """
    
    _check_sentence(forest,"forest")
    
    return int(forest.find("stats").attrib["trees"])
    
    
def get_random_negative_tree(forest, random_state=None):
    
    """
    Funkcja zwraca losowe negatywne (niepoprawne) drzewo z lasu forest.
    
    Gdy las sklada sie tylko z jednego drzewa (poprawnego) to zwracana jest wartosc None.
    
    forest - las drzew [xml.etree.ElementTree.ElementTree]
    """
    
    _check_sentence(forest,"forest")
    
    
    number_of_trees_in_forest = forest.find("stats").attrib["trees"]
    
    if number_of_trees_in_forest == 1:
        Warning("There is only one tree in the forest")
        return None
    
    else:
        while True:
            tree = get_random_tree(forest,random_state)
            if not is_positive(tree):
                return tree
    
    

In [53]:
random_tree = get_random_tree(tree)

In [13]:
ET.dump(random_tree)

<tree grammar_no="1505562921" sent_id="NKJP_1M_0401000002/morph_4-p/morph_4.67-s"><text>Papież Formosus zmarł w kwietniu 896 w wyniku nieznanej choroby lub otrucia.</text>
  <startnode from="0" to="13">wypowiedzenie</startnode>
  <forest-stats cputime="10.487813580000001" inferences="7448127" nodes="124" trees="173" />
    <answer-data>
        <base-answer type="FULL" username="none">
            <comment>AUTO</comment>
        </base-answer>
        <extra-answer type="FULL" username="witoldk">
            <comment>AUTO</comment>
        </extra-answer>
        <extra-answer type="FULL" username="piotrb">
            <comment>AUTO</comment>
        </extra-answer>
    </answer-data>
  <tree-stats height="15" nodes="49" /><node chosen="true" from="0" nid="0" subtrees="173" to="13"><nonterminal>
      <category>wypowiedzenie</category>
    </nonterminal>
    <children chosen="true" rule="w"><child from="0" head="true" nid="1" to="12" /><child from="12" head="false" nid="71" to="13" /><

In [187]:
random_tree.write("test.xml")

In [271]:
def terminals(tree):

    terminal_nodes = [x for x in tree.findall("node[terminal]")]

    terminals = [[(x.attrib["nid"],
                   x.find("terminal//orth").text, 
                   x.find("terminal//base").text, 
                   x.find("terminal//f").text)]  for x in terminal_nodes]

    ids = [x[0][0] for x in terminals]

    return terminals, ids 

In [202]:
terminal_nodes, ids = terminals(random_tree)

In [322]:
ET.dump(random_tree)

<tree grammar_no="1505562921" sent_id="NKJP_1M_0401000002/morph_4-p/morph_4.67-s"><text>Papież Formosus zmarł w kwietniu 896 w wyniku nieznanej choroby lub otrucia.</text>
  <startnode from="0" to="13">wypowiedzenie</startnode>
  <forest-stats cputime="10.487813580000001" inferences="7448127" nodes="124" trees="173" />
    <answer-data>
        <base-answer type="FULL" username="none">
            <comment>AUTO</comment>
        </base-answer>
        <extra-answer type="FULL" username="witoldk">
            <comment>AUTO</comment>
        </extra-answer>
        <extra-answer type="FULL" username="piotrb">
            <comment>AUTO</comment>
        </extra-answer>
    </answer-data>
  <tree-stats height="15" nodes="49" /><node chosen="true" from="0" nid="0" subtrees="173" to="13"><nonterminal>
      <category>wypowiedzenie</category>
    </nonterminal>
    <children chosen="true" rule="w"><child from="0" head="true" nid="1" to="12" /><child from="12" head="false" nid="71" to="13" /><

In [337]:
def dependency_tree(tree):

    dep_tree, ids = terminals(tree)
    n_terminals = len(dep_tree)
    labels = []
    
    for nid in ids:

        parent = tree.find(".//children/child[@nid='"+str(nid)+"']....")

        if parent is not None:
            loc =  np.where([str(nid) in [x[0] for x in branch] and len(branch[-1])>=2 for branch in dep_tree])[0]

            if parent.attrib["nid"] not in ids:
                ids.append(parent.attrib["nid"])


            if len(parent.findall("children/child"))==1:
                
                dep_tree[loc[0]].append(tuple([parent.attrib["nid"]] +[x.text for x in parent.find("nonterminal").getchildren()]))
            
            else:
                
                dep_tree[loc[0]].append((parent.attrib["nid"],))
                
                labels.append(get_subtree_label(tree, tree.find(".//node[@nid='" + str(nid) + "']")))
                
                if parent.attrib["nid"] not in [branch[0][0] for branch in dep_tree]:
                    dep_tree.append([tuple([parent.attrib["nid"]] +[x.text for x in parent.find("nonterminal").getchildren()])])
        else:
            labels.append(get_subtree_label(tree, tree.find(".//node[@nid='" + str(nid) + "']")))

    return(dep_tree, labels, n_terminals)                     

In [338]:
dep_tree, labels, n = dependency_tree(random_tree)
dep_tree

[[('6', 'Papież', 'papież', 'subst:sg:nom:m1'),
  ('5', 'formarzecz', 'papież', 'nom', 'mos', 'poj', '[]'),
  ('4',
   'fno',
   'papież',
   'nom',
   'mos',
   'poj',
   '3',
   '[]',
   'rzecz',
   'bzap',
   'pre',
   'tak',
   'neut',
   'ni'),
  ('3',)],
 [('9', 'Formosus', 'Formosus', 'subst:sg:nom:m1'),
  ('8', 'formarzecz', 'Formosus', 'nom', 'mos', 'poj', '[]'),
  ('7',
   'fno',
   'Formosus',
   'nom',
   'mos',
   'poj',
   '3',
   '[]',
   'rzecz',
   'bzap',
   'pre',
   'tak',
   'neut',
   'ni'),
  ('3',)],
 [('13', 'zmarł', 'zemrzeć', 'praet:sg:m1:perf'),
  ('56',
   'formaczas',
   'os',
   'dk',
   'prze',
   'ozn',
   'mos',
   'poj',
   '3',
   '[subj(np(nom)),xp(caus)]',
   'tak'),
  ('55',
   'fwe',
   'os',
   'dk',
   'prze',
   'ozn',
   'mos',
   'poj',
   '3',
   '[subj(np(nom)),xp(caus)]',
   'tak',
   'neut',
   'ni'),
  ('54',
   'ff',
   'os',
   'dk',
   'prze',
   'ozn',
   'mos',
   'poj',
   '3',
   '[subj(np(nom)),xp(caus)]',
   'tak',
   'neut',
 

In [347]:
def transform_to_dependency_format(tree):
    
    dep_tree, labels, n_terminals = dependency_tree(tree)
    
    values = [[x[1] for x in branch[:-1]] for branch in dep_tree]

    for i in range(n_terminals,len(values)):
        if len(values[i])>0:
            values[i] = ["__nonterminal__"]+values[i]
        else:
            values[i] = ["__wypowiedzenie__","__wypowiedzenie__"]


    tokens_and_rules = [(y[0],"-".join(y[1:])) if len(y)>1 else ("0","wypowiedzenie") for y in values]

    nodes_ids = [[x[0] for x in branch] for branch in dep_tree]
    
    parent_ids = [0]*len(nodes_ids)
    firsts = [x[0] for x in nodes_ids]


    for i in range(len(nodes_ids)):
        last = nodes_ids[i][-1]

        if len(nodes_ids[i])==1 and last == "0":
            parent_ids[i] = 0
        else:
            parent_ids[i] = np.where([last == x for x in firsts])[0][0] + 1 # "+1" po to zeby format danych zgadzal sie z tymi ze stanfordu 
                                                                            # - numerujemy tokeny od 1, a nie od 0

            
    dependency_data = list(zip([x[0] for x in tokens_and_rules],[x[1] for x in tokens_and_rules], parent_ids, labels))
    return(dependency_data)

In [352]:
dep_tree = transform_to_dependency_format(trees[0])
dep_tree

[('Był', 'formaczas-fwe-ff', 31, 1),
 ('bez', 'przyimek', 20, 1),
 ('karty', 'formarzecz-fno', 26, 1),
 ('jazdy', 'formarzecz-fno', 26, 1),
 (',', 'przec', 21, 1),
 ('a', 'spójnik', 21, 1),
 ('instruktor', 'formarzecz-fno-fw', 32, 1),
 ('nie', 'partykuła', 22, 1),
 ('posiada', 'formaczas', 22, 1),
 ('uprawnień', 'formarzecz-fno', 27, 1),
 ('do', 'przyimek', 23, 1),
 ('szkolenia', 'formarzecz-fno', 28, 1),
 ('kierowców', 'formarzecz-fno', 28, 1),
 ('na', 'przyimek', 24, 1),
 ('tę', 'formaprzym-fpt', 29, 1),
 ('kategorię', 'formarzecz-fno', 29, 1),
 ('prawa', 'formarzecz-fno', 30, 1),
 ('jazdy', 'formarzecz-fno', 30, 1),
 ('.', 'znakkonca', 25, 1),
 ('__nonterminal__', 'fpm-fw', 31, 1),
 ('__nonterminal__', 'zdanie', 25, 1),
 ('__nonterminal__', 'fwe-ff', 32, 1),
 ('__nonterminal__', 'fpm', 27, 1),
 ('__nonterminal__', 'fpm', 28, 1),
 ('__wypowiedzenie__', '__wypowiedzenie__', 0, 1),
 ('__nonterminal__', 'fno', 20, 1),
 ('__nonterminal__', 'fno-fw', 32, 1),
 ('__nonterminal__', 'fno', 23

In [349]:
len(dep_tree)

21

In [353]:
ET.dump(trees[0])

<tree grammar_no="1505562921" sent_id="NKJP_1M_1305000000612/morph_1-p/morph_1.27-s"><text>Był bez karty jazdy, a instruktor nie posiada uprawnień do szkolenia kierowców na tę kategorię prawa jazdy.</text>
  <startnode from="0" to="19">wypowiedzenie</startnode>
  <forest-stats cputime="21.942414412999998" inferences="30186492" nodes="403" trees="24585" />
    <answer-data>
        <base-answer type="FULL" username="none">
            <comment>AUTO</comment>
        </base-answer>
        <extra-answer type="FULL" username="paulinar">
            <comment>AUTO</comment>
        </extra-answer>
        <extra-answer type="FULL" username="sebastianz">
            <comment>AUTO</comment>
        </extra-answer>
    </answer-data>
  <node chosen="true" from="0" nid="0" subtrees="24585" to="19"><nonterminal>
      <category>wypowiedzenie</category>
    </nonterminal>
    <children chosen="true" rule="w"><child from="0" head="true" nid="1" to="18" /><child from="18" head="false" nid="261" to=

In [350]:
def write_dependency_format(dep_tree, folder):
    tokens = [x[0] for x in dep_tree]
    with open(folder+"/tokens.txt", "a+") as f:
        f.write(" ".join(tokens) + "\n")
        
    rules = [x[1] for x in dep_tree]
    with open(folder+"/rules.txt", "a+") as f:
        f.write(" ".join(rules) + "\n")
        
    parents = [str(x[2]) for x in dep_tree]
    with open(folder+"/parents.txt", "a+") as f:
        f.write(" ".join(parents) + "\n")
        
    labels = [str(x[3]) for x in dep_tree]
    with open(folder+"/labels.txt", "a+") as f:
        f.write(" ".join(labels) + "\n")

In [270]:
write_dependency_format(dep_tree, "Data")

In [278]:
data_folder = "../Składnica-frazowa-171220/**/*.xml"

In [279]:
import glob

j = 1
trees, labels = [],[]
for filename in glob.iglob(data_folder, recursive=True):
    
    forest = ET.parse(filename)
 
    try:
        num_trees = number_of_trees_in_forest(forest)
        
        
        if num_trees<100000:
            
            
            print(j)
            print(num_trees)
            
            trees.append(get_positive_tree(forest))
            labels.append(1)
        
            
            if num_trees < 10:

                trees.append(get_random_negative_tree(forest))
                labels.append(0)

            elif num_trees<20:
                for i in range(3):
                    trees.append(get_random_negative_tree(forest))
                    labels.append(0)
            elif num_trees<30:
                for i in range(4):
                    trees.append(get_random_negative_tree(forest))
                    labels.append(0)
            elif num_trees<40:
                for i in range(5):
                    trees.append(get_random_negative_tree(forest))
                    labels.append(0)
            elif num_trees<10000:
                for i in range(10):
                    trees.append(get_random_negative_tree(forest))
                    labels.append(0)
               
            j += 1
        
    except:
        pass
    
    
    
    if j>5:
        break

1
24585
2
240
3
112
4
351
5
213


In [280]:
len(trees)

45

In [351]:
for i, tree in enumerate(trees):
    print(i)
    write_dependency_format(transform_to_dependency_format(tree), "Data")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


In [6]:
def _tree_height(xml_tree, node_id=0):
    
    """
    Funkcja oblicza wysokosc drzewa (dlugosc najdluzszej sciezki od korzenia do liscia)
    lub lasu (maximum z wszystkich mozliwych drzew)
    
    xml_tree - drzewo luba las drzew lub korzen drzewa jednego lub drugiego
    """
    
    
    if type(xml_tree)==ET.Element:
        node = tree
    else:
        node = tree.getroot()
        
    node = node.find('.//node[@nid="' + str(node_id) + '"]')
    children = node.findall(".//children//child")
    
    if len(children)==0:
        return 1
    else:
        children_nodes = [child.attrib["nid"] for child in children]
        return 1+max([_tree_height(tree,x) for x in children_nodes])
        

In [7]:
_tree_height(tree.getroot())

15

In [8]:
def number_of_nodes(tree):
    """
    Zwraca liczbe wezlow w drzewie.
    
    tree - drzewo lub korzen drzewa
    """
    if type(tree)==ET.Element:
        return len(tree.findall("node"))
    else:
        return len(tree.getroot().findall("node")) 

In [10]:
number_of_nodes(tree)

124

In [233]:
def get_subtree_label(tree, node):
    
    if node.find("children") is None:
        return 1
    
    if node.find("children").attrib["chosen"] == "false":
        return 0
    else:
        return int(np.all([is_positive_subtree(tree, tree.find(".//node[@nid='"+ x.attrib["nid"] + "']")) for x in node.find("children").findall("child")]))
        

In [234]:
node = random_tree.getroot().getchildren()[14]
a = get_subtree_label(random_tree,node)
a
    

1

In [157]:
random_tree.getroot().getchildren()[14].attrib

{'chosen': 'true', 'from': '1', 'nid': '9', 'subtrees': '1', 'to': '2'}

In [88]:
[x.attrib["nid"] for x in node.find("children").findall("child")]

['1', '71']

In [104]:
ET.dump(random_tree)

<tree grammar_no="1505562921" sent_id="NKJP_1M_0401000002/morph_4-p/morph_4.67-s"><text>Papież Formosus zmarł w kwietniu 896 w wyniku nieznanej choroby lub otrucia.</text>
  <startnode from="0" to="13">wypowiedzenie</startnode>
  <forest-stats cputime="10.487813580000001" inferences="7448127" nodes="124" trees="173" />
    <answer-data>
        <base-answer type="FULL" username="none">
            <comment>AUTO</comment>
        </base-answer>
        <extra-answer type="FULL" username="witoldk">
            <comment>AUTO</comment>
        </extra-answer>
        <extra-answer type="FULL" username="piotrb">
            <comment>AUTO</comment>
        </extra-answer>
    </answer-data>
  <tree-stats height="15" nodes="49" /><node chosen="true" from="0" nid="0" subtrees="173" to="13"><nonterminal>
      <category>wypowiedzenie</category>
    </nonterminal>
    <children chosen="true" rule="w"><child from="0" head="true" nid="1" to="12" /><child from="12" head="false" nid="71" to="13" /><

In [16]:
def is_positive(tree): 
    
    """
    Funkcja sprawdza czy drzewo jest pozytywne - czy jest poprawnym drzewem rozbioru
    Zwraca wartosc logiczna.
    
    tree - drzewo [xml.etree.ElementTree.ElementTree]
    """
    
    _check_sentence(tree,"tree")
    
    assert len(tree.find("node"))>0, 'There is not "node" element in the tree'
    
    #Sprawdzamy czy wszystkie wezly "node" maja wartosc chosen="true":
    for x in tree.iter("node"):
        if not x.attrib["chosen"]=="true":
            return False

    #Sprawdzamy czy wszystkie wezly "children" maja wartosc chosen="true":
    for x in tree.iter(".//children"):
        if not x.attrib["chosen"]=="true":
            return False
        
    return True
        
        
    


def get_positive_tree(forest):
    
    """
    Funkcja zwraca poprawne (pozytywne) drzewo z upakowanego lasu (forest).
    Dla lasu, w ktorym nie ma poprawnego drzewa funkcja wyrzuca blad.
    
    forest - las drzew [xml.etree.ElementTree.ElementTree]
    """

    # sprawdzenie poprawnosci lasu i ewentualne wypisanie komunikatu
    _check_sentence(forest,"forest")
            
    root_old = forest.getroot()
    root_new = ET.Element("tree",root_old.attrib)
    
    
    # las sklada sie z drzew (wezly "node") oraz dodatkowych danych (inne wezly) -
    # tresc wypowiedzenia, statystyki lasu, itd. - i tutaj przepisujemy te wezly
    features = root_old.getchildren()
    for feature in features:
        if feature.tag != "node": 
            feature_copy = deepcopy(feature)
            if feature_copy.tag == "stats":
                feature_copy.tag = "forest-stats"
                
            root_new.append(feature_copy) # modyfikujemy tag wezla wiec potrzebna kopia, zeby nie zmodyfikowac oryginalnego drzewa
            
    # definiujemy rekurencyjna funkcje, ktora bedzie przechodzic po lesie i
    # kolekcjonowac wezly, tworzac losowe drzewo.
    # drzewo jest tworzone na korzeniu root_new.
    def add_positive_children(current_node_old):
        
        current_node_new = ET.SubElement(root_new, current_node_old.tag, current_node_old.attrib)
        
        features = current_node_old.getchildren()
        # kazdy "node" jest terminalem albo nieterminalem i ma opis wlasnosci
        # i tutaj wyciagamy te wlasnosci z wezla innego niz "children"
        for feature in features:
            if feature.tag != "children": 
                current_node_new.append(feature)
        
        
        children_old = current_node_old.findall('children[@chosen="true"]')
        # powinno byc tylko jedno takie dziecko
        
        assert len(children_old) <= 1, 'More than one children has chosen="true"'
        
        if len(children_old) == 0: #jestesmy w lisciu wiec konczymy dzialanie funkcji
            return None
        
        #random_children_old = children_old[np.random.choice(len(children_old),1)[0]]
        children_new = ET.SubElement(current_node_new, children_old[0].tag, children_old[0].attrib)
        for child_old in children_old[0].getchildren():
            x = ET.SubElement(children_new, child_old.tag, child_old.attrib)
            next_node = root_old.find('.//node[@nid="' + x.attrib["nid"] + '"]')
            assert next_node.attrib["chosen"] == "true"
            add_positive_children(next_node)
        
    
    # wezel startowy (przyjmujemy, ze node z id=0 jest zawsze pierwszy):
    # TODO: upewnic sie czy to jest poprawne podejscie - czy moze byc inny wezel poczatkowym
    node_0 = root_old.find('.//node[@nid="0"][@chosen="true"]') 
    
    # konstruujemy drzewo:
    add_positive_children(node_0)
    
    positive_tree = ET.ElementTree(root_new)

    # Sprawdzenie poprawnosci drzewa
    assert is_positive(positive_tree), """Something gone wrong - tree is not positive"""
        
        
    return positive_tree
    

In [10]:
pos_tree = get_positive_tree(tree)

In [83]:
get_positive_tree(tree).write("test.xml")