# Parser to convert spark.ml.model.toDebugstring to json format tree

## imports

In [1]:
import json
import pandas as pd
import numpy as np
import ast
import re

## Import Vocaubulary from countVectorizer model

In [2]:
data_vocabulary = pd.read_csv('vocabulary.csv', sep=',')

In [3]:
vocab = np.array(data_vocabulary.columns)
print(vocab)

['EDDF' 'KLAX' 'LFPG' ... 'KGYH' 'KCGZ' 'KOCK']


In [4]:
vocab = vocab.astype(str)

In [5]:
vocab = np.char.replace(vocab, ' ', '')

In [6]:
vocab[14]

'KDFW'

## parser 1

In [7]:
def parse1(lines):
    block = []
    while lines :
        
        if lines[0].startswith('If'):
            bl = ' '.join(lines.pop(0).split()[1:]).replace('(', '').replace(')', '')
            block.append({'name':bl, 'children':parse1(lines)})
            
            
            if lines[0].startswith('Else'):
                be = ' '.join(lines.pop(0).split()[1:]).replace('(', '').replace(')', '')
                block.append({'name':be, 'children':parse1(lines)})
        elif not lines[0].startswith(('If','Else')):
            block2 = lines.pop(0)
            block.append({'name':block2})
        else:
            break	
    return block

# Convert Tree to JSON
def tree_json1(tree):
    data = []
    for line in tree.splitlines() : 
        if line.strip():
            line = line.strip()
            data.append(line)
        else : break
        if not line : break
    res = []
    res.append({'name':'Root', 'children':parse1(data[1:])})
    print ('Conversion Success !')
    return res

## debug txt to json tree

In [8]:
debugtxt = """DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d56b94fed0a2, depth=5, numNodes=41, numClasses=2, numFeatures=3159
  If (feature 3 <= 0.5)
   If (feature 9 <= 0.5)
    If (feature 4 <= 11.5)
     If (feature 108 <= 0.5)
      If (feature 31 <= 0.5)
       Predict: 1.0
      Else (feature 31 > 0.5)
       Predict: 0.0
     Else (feature 108 > 0.5)
      If (feature 549 <= 0.5)
       Predict: 0.0
      Else (feature 549 > 0.5)
       Predict: 1.0
    Else (feature 4 > 11.5)
     Predict: 0.0
   Else (feature 9 > 0.5)
    If (feature 533 <= 0.5)
     If (feature 154 <= 0.5)
      Predict: 0.0
     Else (feature 154 > 0.5)
      If (feature 1223 <= 13.5)
       Predict: 0.0
      Else (feature 1223 > 13.5)
       Predict: 1.0
    Else (feature 533 > 0.5)
     If (feature 0 <= 0.5)
      If (feature 9 <= 7.5)
       Predict: 1.0
      Else (feature 9 > 7.5)
       Predict: 0.0
     Else (feature 0 > 0.5)
      Predict: 0.0
  Else (feature 3 > 0.5)
   If (feature 1338 <= 0.5)
    If (feature 398 <= 48.0)
     If (feature 1146 <= 3.5)
      Predict: 0.0
     Else (feature 1146 > 3.5)
      If (feature 41 <= 5.5)
       Predict: 0.0
      Else (feature 41 > 5.5)
       Predict: 1.0
    Else (feature 398 > 48.0)
     If (feature 111 <= 31.5)
      If (feature 23 <= 0.5)
       Predict: 0.0
      Else (feature 23 > 0.5)
       Predict: 1.0
     Else (feature 111 > 31.5)
      Predict: 1.0
   Else (feature 1338 > 0.5)
    If (feature 96 <= 0.5)
     If (feature 884 <= 5.5)
      Predict: 0.0
     Else (feature 884 > 5.5)
      Predict: 1.0
    Else (feature 96 > 0.5)
     If (feature 58 <= 3.5)
      Predict: 0.0
     Else (feature 58 > 3.5)
      Predict: 1.0"""

In [9]:
tree_json = tree_json1(debugtxt)

Conversion Success !


In [10]:
tree_json

[{'name': 'Root',
  'children': [{'name': 'feature 3 <= 0.5',
    'children': [{'name': 'feature 9 <= 0.5',
      'children': [{'name': 'feature 4 <= 11.5',
        'children': [{'name': 'feature 108 <= 0.5',
          'children': [{'name': 'feature 31 <= 0.5',
            'children': [{'name': 'Predict: 1.0'}]},
           {'name': 'feature 31 > 0.5',
            'children': [{'name': 'Predict: 0.0'}]}]},
         {'name': 'feature 108 > 0.5',
          'children': [{'name': 'feature 549 <= 0.5',
            'children': [{'name': 'Predict: 0.0'}]},
           {'name': 'feature 549 > 0.5',
            'children': [{'name': 'Predict: 1.0'}]}]}]},
       {'name': 'feature 4 > 11.5', 'children': [{'name': 'Predict: 0.0'}]}]},
     {'name': 'feature 9 > 0.5',
      'children': [{'name': 'feature 533 <= 0.5',
        'children': [{'name': 'feature 154 <= 0.5',
          'children': [{'name': 'Predict: 0.0'}]},
         {'name': 'feature 154 > 0.5',
          'children': [{'name': 'feature 1

**Important : il faut convertir les ' en " et supprimer les [] au début et à la fin du texte**

## Parser with aiport translation

In [8]:
def parse2(lines, vocab):
    block = []
    while lines:
        if lines[0].startswith('If'):
            bl_tokens = lines.pop(0).split()
            bl = ' '.join(bl_tokens[1:]).replace('(', '').replace(')', '')
            feature_index = int(bl_tokens[-3])
            bl = bl.replace(f'feature {feature_index}', f'#airport {vocab[feature_index]}')
            block.append({'name': bl, 'children': parse2(lines, vocab)})

            if lines[0].startswith('Else'):
                be_tokens = lines.pop(0).split()
                be = ' '.join(be_tokens[1:]).replace('(', '').replace(')', '')
                feature_index = int(be_tokens[-3])
                be = be.replace(f'feature {feature_index}', f'#airport {vocab[feature_index]}')
                block.append({'name': be, 'children': parse2(lines, vocab)})
        elif not lines[0].startswith(('If', 'Else')):
            block2 = lines.pop(0)
            block.append({'name': block2})
        else:
            break
    return block

# Convert Tree to JSON
def tree_json2(tree, vocab):
    data = []
    for line in tree.splitlines():
        if line.strip():
            line = line.strip()
            data.append(line)
        else:
            break
        if not line:
            break
    res = []
    res.append({'name': 'Root', 'children': parse2(data[1:], vocab)})
    print('Conversion Success!')
    return res


In [15]:
with open('debugString.txt', 'r') as file:
    debugString = file.read()
    file.close()

In [16]:
debugString

'DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6185177ffeb0, depth=30, numNodes=13641, numClasses=2, numFeatures=3159\n  If (feature 32 <= 0.5)\n   If (feature 162 <= 1.5)\n    If (feature 41 <= 0.5)\n     If (feature 4 <= 0.5)\n      If (feature 156 <= 0.5)\n       If (feature 11 <= 0.5)\n        If (feature 411 <= 0.5)\n         If (feature 435 <= 0.5)\n          If (feature 3 <= 0.5)\n           If (feature 116 <= 0.5)\n            If (feature 38 <= 0.5)\n             If (feature 163 <= 0.5)\n              If (feature 57 <= 0.5)\n               If (feature 31 <= 3.5)\n                If (feature 436 <= 1.5)\n                 If (feature 623 <= 0.5)\n                  If (feature 168 <= 1.5)\n                   If (feature 127 <= 17.5)\n                    If (feature 187 <= 0.5)\n                     If (feature 316 <= 0.5)\n                      If (feature 16 <= 0.5)\n                       If (feature 5 <= 0.5)\n                        If (feature 471 <= 11.0)\n    

In [17]:
tree_json_airport = tree_json2(debugString, vocab=vocab)
tree_json_airport

Conversion Success!


[{'name': 'Root',
  'children': [{'name': '#airport KMCO <= 0.5',
    'children': [{'name': '#airport YSSY <= 1.5',
      'children': [{'name': '#airport KFLL <= 0.5',
        'children': [{'name': '#airport ZGGG <= 0.5',
          'children': [{'name': '#airport LHBP <= 0.5',
            'children': [{'name': '#airport LTFM <= 0.5',
              'children': [{'name': '#airport GCXO <= 0.5',
                'children': [{'name': '#airport VILH <= 0.5',
                  'children': [{'name': '#airport ZSPD <= 0.5',
                    'children': [{'name': '#airport EDDH <= 0.5',
                      'children': [{'name': '#airport SCEL <= 0.5',
                        'children': [{'name': '#airport LFBO <= 0.5',
                          'children': [{'name': '#airport VVNB <= 0.5',
                            'children': [{'name': '#airport MMMX <= 3.5',
                              'children': [{'name': '#airport VOCB <= 1.5',
                                'children': [{'name'

In [14]:
# with open('data.json', 'w') as file:
#     file.write(tree_json_airport)
#     file.close