In [39]:
import pandas as pd
import json
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from stellargraph import StellarGraph
from stellargraph.mapper import PaddedGraphGenerator
from tensorflow import keras


class GraphPredictor:
    NODE_TYPES = ['require container', 'function', 'local variable', 'file',
                  'tableconstructor', 'module', 'statement:keyword', 
                  'variable container', 'statement:numericfor', 'statement:genericfor', 
                  'statement:if', 'global function', 'statement:functioncall', 
                  'statement:assign', 'function container', 'statement:localassign', 
                  'global variable', 'statement:globalfunction', 'statement:while', 
                  'statement:localfunction', 'interface container', 'interface', 
                  'require local variable', 'statement:do', 'statement:repeat']

    def __init__(self, model_path):
        self.model = keras.models.load_model(model_path)
    
    def create_StellarGraph(self, graph):
        nodes = graph['encNodeTypes']
        edges = graph['edges']
        label = graph['isSpec'] or graph['isTest']
        stellGraph = StellarGraph(nodes=nodes, edges=edges, source_column='from', target_column='to', is_directed=True)
        return stellGraph, label
    
    def encode_node_types(self, graph):
        enc = OneHotEncoder(handle_unknown='ignore')
        X = np.array(self.NODE_TYPES).reshape(-1,1)
        enc.fit(X)
        encoded = enc.transform(graph['nodes']['type'].values.reshape(-1,1))
        graph['encNodeTypes'] = pd.DataFrame(encoded.toarray())
    
    def read_graph(self, graph_path):
        graph = {}
        with open(graph_path) as jsonFile:
            jsonData = json.load(jsonFile)
            graph['filename'] = jsonData['_filename']
            graph['isTest'] = int(jsonData['_isTest'])
            graph['isSpec'] = int(jsonData['_isSpec'])
            graph['nodes'] = pd.DataFrame(jsonData['nodes'], columns=['id', 'type'])
            graph['nodes'] = graph['nodes'].set_index('id')
            graph['edges'] = pd.DataFrame(jsonData['edges'], columns=['from', 'to'])
        self.encode_node_types(graph) 
        return self.create_StellarGraph(graph)     
    
    def predict(self, graph_path):
        stellGraph, label = self.read_graph(graph_path)
        generator = PaddedGraphGenerator(graphs=[stellGraph])
        X = generator.flow(graphs=[stellGraph])
        return self.model.predict(X)[0][0]
    
    def evaluate(self, graph_path):
        stellGraph, label = self.read_graph(graph_path)
        generator = PaddedGraphGenerator(graphs=[stellGraph])
        g = generator.flow(graphs=[stellGraph], targets=[label])
        return self.model.evaluate(g, verbose=0)

In [40]:
graph_predictor = GraphPredictor('model')

In [52]:
result = graph_predictor.predict('../data/_TEST/file.lua.json')
print(f'This is {str(round(result * 100, 2))}% TEST file')

This is 75.98% TEST file


In [55]:
test_metrics = graph_predictor.evaluate('../data/_TEST/file.lua.json')
print("\nTest Metrics:")
for name, val in zip(graph_predictor.model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))


Test Metrics:
	loss: 0.2747
	acc: 1.0000


In [43]:
graph_predictor.model

<tensorflow.python.keras.engine.functional.Functional at 0x7f3327a73a00>