In [46]:
import pandas as pd
import json
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from stellargraph import StellarGraph
from stellargraph.mapper import PaddedGraphGenerator
from tensorflow import keras


class GraphPredictor:
    NODE_TYPES = ['require container', 'function', 'local variable', 'file',
                  'tableconstructor', 'module', 'statement:keyword', 
                  'variable container', 'statement:numericfor', 'statement:genericfor', 
                  'statement:if', 'global function', 'statement:functioncall', 
                  'statement:assign', 'function container', 'statement:localassign', 
                  'global variable', 'statement:globalfunction', 'statement:while', 
                  'statement:localfunction', 'interface container', 'interface', 
                  'require local variable', 'statement:do', 'statement:repeat']

    def __init__(self, model_path):
        self.model = keras.models.load_model(model_path)
    
    def create_StellarGraph(self, graph):
        nodes = graph['encNodeTypes']
        edges = graph['edges']
        label = graph['isSpec'] or graph['isTest']
        stellGraph = StellarGraph(nodes=nodes, edges=edges, source_column='from', target_column='to', is_directed=True)
        return stellGraph, label
    
    def encode_node_types(self, graph):
        enc = OneHotEncoder(handle_unknown='ignore')
        X = np.array(self.NODE_TYPES).reshape(-1,1)
        enc.fit(X)
        encoded = enc.transform(graph['nodes']['type'].values.reshape(-1,1))
        graph['encNodeTypes'] = pd.DataFrame(encoded.toarray())
    
    def read_graph(self, graph_path):
        graph = {}
        with open(graph_path) as jsonFile:
            jsonData = json.load(jsonFile)
            graph['filename'] = jsonData['_filename']
            graph['isTest'] = int(jsonData['_isTest'])
            graph['isSpec'] = int(jsonData['_isSpec'])
            graph['nodes'] = pd.DataFrame(jsonData['nodes'], columns=['id', 'type'])
            graph['nodes'] = graph['nodes'].set_index('id')
            graph['edges'] = pd.DataFrame(jsonData['edges'], columns=['from', 'to'])
        self.encode_node_types(graph) 
        return self.create_StellarGraph(graph)     
    
    def predict(self, graph_path):
        stellGraph, label = self.read_graph(graph_path)
        generator = PaddedGraphGenerator(graphs=[stellGraph])
        X = generator.flow(graphs=[stellGraph])
        return self.model.predict(X)[0]
    
    def evaluate(self, graph_path):
        stellGraph, label = self.read_graph(graph_path)
        generator = PaddedGraphGenerator(graphs=[stellGraph])
        g = generator.flow(graphs=[stellGraph], targets=[label])
        return self.model.evaluate(g, verbose=0)

In [47]:
graph_predictor = GraphPredictor('model_3classes_undersample')

In [48]:
result = graph_predictor.predict('../data/cgilua/download.lua.json')
#print(f'This is {str(round(result * 100, 2))}% TEST file')
for r in result:
    print(r, end=' ')

0.17748807 0.7894641 0.033047765 

In [22]:
test_metrics = graph_predictor.evaluate('../data/lua-codegen/Graph.lua.json')
print("\nTest Metrics:")
for name, val in zip(graph_predictor.model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))

ValueError: in user code:

    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1233 test_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1224 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1217 run_step  **
        outputs = model.test_step(data)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1185 test_step
        self.compiled_loss(
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 3) are incompatible
