In [6]:
from zss import simple_distance, distance
from zss import Node

class TreeNode:
    def __init__(self, value):
        self.value = value
        self.children = []

    def add_child(self, child):
        self.children.append(child)

    def print_tree(self, depth=0):
        print("  " * depth + self.value)
        for child in self.children:
            child.print_tree(depth + 1)

    def to_zss_node(self):
        if not self.children:
            return Node(self.value, [])
        else:
            children_nodes = [child.to_zss_node() for child in self.children]
            return Node(self.value, children_nodes)
        
    def count_nodes(self):
        # 현재 노드를 포함하여 노드 수 계산
        count = 1
        for child in self.children:
            count += child.count_nodes()
        return count

    def count_edges(self):
        # 현재 노드의 자식 수를 엣지로 간주하여 엣지 수 계산
        edge_count = len(self.children)
        for child in self.children:
            edge_count += child.count_edges()
        return edge_count
        


In [69]:
def text_to_tree(sentence):
    stack = []
    root = TreeNode(None)
    sentence = sentence.replace(" ", "")
    i = 0
    while True:
        if i >= len(sentence):
            break
        if sentence[i] != '(' and sentence[i] != ')':
            if sentence[i] == "'":
                j = i+1
                while True:
                    if sentence[j] == "'":
                        break
                    j += 1
                node = sentence[i+1:j]
                child = TreeNode(node)
                root.add_child(child)
                i = j + 1
            else:
                cur = sentence[i]
                child = TreeNode(cur)
                root.add_child(child)
                i += 1
        elif sentence[i] == '(':
            stack.append(child)
            root = child
            i += 1
        else:
            stack.pop()
            if stack != []:
                root = stack[-1]
            i+=1
    return root

In [65]:
seq2 = seq2.replace(" ", "")
seq2 = seq2.replace(",", "")
seq2

"→('FS'+('MG''IX'→('OC'*('VI'0)))'VU'X(0→('WE''BS'))'PX')"

In [66]:
node = seq2[3:5]
print(node)

FS


In [67]:
root = text_to_tree(seq2)

'
'
'
'
'
'
'
'
'


In [68]:
root.print_tree()

→
  FS
  +
    MG
    IX
    →
      OC
      *
        VI
        0
  VU
  X
    0
    →
      WE
      BS
  PX


In [6]:
def tree_edit_distance(graph1, graph2):
    tree1 = graph1.to_zss_node()
    tree2 = graph2.to_zss_node()
    dist = simple_distance(tree1, tree2)
    return dist

In [7]:
def max_edit_distance(graph1, graph2):
    max1 = graph1.count_nodes() + graph1.count_edges()
    max2 = graph2.count_nodes() + graph2.count_edges()
    dist = max1 + max2
    return dist

In [8]:
def graph_edit_similarity(sentence1, sentence2):
    graph1 = text_to_tree(sentence1)
    graph2 = text_to_tree(sentence2)
    
    ged = tree_edit_distance(graph1, graph2)
    maxged = max_edit_distance(graph1, graph2)
    
    sim = 1 - (ged / maxged)
    
    return sim

In [3]:
seq2 = '→( \'FS\', +( \'MG\', \'IX\', →( \'OC\', *( \'VI\', 0 ) ) ), \'VU\', X( 0, →( \'WE\', \'BS\' ) ), \'PX\' )'

In [9]:
seq = '→( FS, +( MG, IX, →( OC, *( VI, 0 ) ) ), VU, X( 0, →( WE, BS ) ), PX )'

In [16]:
root = text_to_tree(seq2)

In [17]:
root.print_tree()

→
  '
  F
  S
  '
  ,
  +
    '
    M
    G
    '
    ,
    '
    I
    X
    '
    ,
    →
      '
      O
      C
      '
      ,
      *
        '
        V
        I
        '
        ,
        0
  ,
  '
  V
  U
  '
  ,
  X
    0
    ,
    →
      '
      W
      E
      '
      ,
      '
      B
      S
      '
  ,
  '
  P
  X
  '
