# GraphPredictor
Trieda na jednoduchu manipulaciu s natrenovanym modelom neuronovaj siete

In [2]:
import pandas as pd
import json
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from stellargraph import StellarGraph
from stellargraph.mapper import PaddedGraphGenerator
from tensorflow import keras
from graphviz import Digraph


class GraphPredictor:
    
    NODE_TYPES = ['require container', 'function', 'local variable', 'file',
                  'tableconstructor', 'module', 'statement:keyword', 
                  'variable container', 'statement:numericfor', 'statement:genericfor', 
                  'statement:if', 'global function', 'statement:functioncall', 
                  'statement:assign', 'function container', 'statement:localassign', 
                  'global variable', 'statement:globalfunction', 'statement:while', 
                  'statement:localfunction', 'interface container', 'interface', 
                  'require local variable', 'statement:do', 'statement:repeat']

    
    def __init__(self, model):
        # if model is path
        if (isinstance(model, str)):
            self.model = keras.models.load_model(model)
        else:
            self.model = model
    
    
    def create_StellarGraph(self, graph):
        nodes = graph['encNodeTypes']
        edges = graph['edges']
        stellGraph = StellarGraph(
                nodes=nodes,
                edges=edges,
                source_column='from',
                target_column='to',
                is_directed=True
            )
        
        return stellGraph
    
    
    def create_label(self, graph):
        label = [1, 0, 0]
        if graph['is_shebang']:
            label = [0, 1, 0]
        elif graph['is_spec']:
            label = [0, 0, 1]
        
        return label
    
    
    def encode_node_types(self, graph):
        enc = OneHotEncoder(handle_unknown='ignore')
        X = np.array(self.NODE_TYPES).reshape(-1,1)
        enc.fit(X)
        encoded = enc.transform(graph['nodes']['type'].values.reshape(-1,1))
        graph['encNodeTypes'] = pd.DataFrame(encoded.toarray())
    
    
    def read_graph(self, graph_path):
        graph = {}
        
        with open(graph_path) as json_file:
            json_data = json.load(json_file)
            graph['filename'] = json_data['_filename']
            graph['path'] = json_data['_path']
            graph['is_test'] = int(json_data['_isTest'])
            graph['is_spec'] = int(json_data['_isSpec'])
            graph['is_shebang'] = int(json_data['_isShebang'])
            graph['nodes'] = pd.DataFrame(json_data['nodes'], columns=['id', 'type', 'text'])
            graph['nodes'] = graph['nodes'].set_index('id')
            graph['edges'] = pd.DataFrame(json_data['edges'], columns=['from', 'to'])
            
        self.encode_node_types(graph) 
        
        return graph    
    
    
    def show_graph(self, graph):
        graph_raw = graph
        
        # if graph is path
        if (isinstance(graph, str)):
            graph_raw = self.read_graph(graph)
        
        nodes_data = graph_raw['nodes']
        edges_data = graph_raw['edges']

        dot = Digraph(format='png')
        
        for idx, row in nodes_data.iterrows():
            dot.node(str(idx), str(row['type'] + '\n' + str(row['text'])), shape='box')

        for idx, row in edges_data.iterrows():
            dot.edge(str(row['from']), str(row['to']))
        
        return dot
    
    
    def predict(self, graphs, print_predictions=True):
        graphs_raw = graphs
        
        # if graphs are paths to graphs
        if (isinstance(graphs[0], str)):
            graphs_raw = [self.read_graph(path) for path in graphs]
            
        stellGraphs = [self.create_StellarGraph(g) for g in graphs_raw]
        labels = [self.create_label(g) for g in graphs_raw]
        
        generator = PaddedGraphGenerator(graphs=stellGraphs)
        X = generator.flow(graphs=stellGraphs)
        predicts = self.model.predict(X)
        
        if (print_predictions):
            for i, p in enumerate(predicts):
                filename = graphs_raw[i]['filename']
                basic = round(p[0]*100, 2)
                shebang = round(p[1]*100, 2)
                spec = round(p[2]*100, 2)
                print(f'{filename}:\n\tbasic {basic}%\n\tshebang {shebang}%\n\tspec {spec}%')
                
            return
        
        return predicts
    
    
    def evaluate(self, graphs):
        graphs_raw = graphs
        
        # if graphs are paths to graphs
        if (isinstance(graphs[0], str)):
            graphs_raw = [self.read_graph(path) for path in graphs]
            
        stellGraphs = [self.create_StellarGraph(g) for g in graphs_raw]
        labels = [self.create_label(g) for g in graphs_raw]
        
        generator = PaddedGraphGenerator(graphs=stellGraphs)
        test_gen = generator.flow(graphs=stellGraphs, targets=np.array(labels))
        
        return self.model.evaluate(test_gen, verbose=1)

## Usage of GraphPredictor

Konstruktor berie ako parameter cestu k modelu alebo samotny model

In [3]:
graph_predictor = GraphPredictor('model')

In [4]:
graph_paths = ['../data/30log/30log.lua.json', 
               '../data/bencode/dumptorrent.lua.json', 
               '../data/busted/cl_error_messages.lua.json']

### Predictions 
Funkcia berie ako parameter list ciest ku grafom alebo nacitane grafy.

In [5]:
graph_predictor.predict(graph_paths)

30log.lua:
	basic 88.31%
	shebang 6.2%
	spec 5.49%
dumptorrent.lua:
	basic 28.75%
	shebang 66.13%
	spec 5.12%
cl_error_messages.lua:
	basic 1.09%
	shebang 1.27%
	spec 97.65%


### Evaluation
Funkcia berie ako parameter list ciest ku grafom alebo nacitane grafy.

In [6]:
test_metrics = graph_predictor.evaluate(graph_paths)

print("\nTest Set Metrics:")
for name, val in zip(graph_predictor.model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))


Test Set Metrics:
	loss: 0.1872
	accuracy: 1.0000
	precision: 1.0000


### Graph vizualisation
Funkcia berie ako parameter cestu ku konkretnemu grafu alebo instanciu nacitaneho grafu

In [None]:
graph_predictor.show_graph(graph_paths[0])