# Evaluation Wrapper

## Imports

In [None]:
import os
import time
import sys
import pandas as pd
from performance_test import *
from kafka_producer import *
#from directNeo4jImporter import *
from node_deletion import *
from correctness_test import *
from partition_changer import *
from heapSize_changer import *
from streams_conf_changer import *

## Parameter

#### General Parameter

In [None]:
## put in settings(?)

#name of database

node_labels = {
    'original': {
        'block'       : 'Block',
        'transaction' : 'Transaction',
        'address'     : 'Address'
    },
    'test': {
        'block'       : 'Block_test',
        'transaction' : 'Transaction_test',
        'address'     : 'Address_test'
    }
}

neo4j_port = str(7687)

# parameters which need to bee handed over and retrieved from a settings file?
conf_file_path = '../example_streams.conf'
#conf_file_path = '../streams.conf'

path_to_neo4j_conf_directory = '/etc/neo4j/'
path_to_streams_conf = path_to_neo4j_conf_directory + 'streams.conf'

kafka_topics =   {
    'transaction': 'transactions', 
    'block': 'blocks'
}

#### Changing Parameter

In [None]:
# Changing parameters

# set to false if test_nodes are inserted 
evaluate_original = True


# block heights
if evaluate_original == True:
    start_block_height = 591106  
    end_block_height   = start_block_height + 1
else:
    start_block_height = 1 
    end_block_height   = start_block_height + 5


    
##############




# check correctness of nodes inserted
check_correctness = False

# matching input on previous transactions   # (address not transaction)
match_on_previous_add = True  #(?)

# bypass kafka pipeline
bypass_kafka = False 

# number of kafka partitions 
kafka_partitions_list = [1,2,4,8]

# batching (?)
batching_list = [1, 100, 400, 800]

# neo4j heap size
heap_size_list = [5, 8, 16, 32]
# missing func for heap_size changer


# to change throughout loop (!)
kafka_partitions = 1
batching_size = 1
heap_size = 5


######


# deleting Nodes at the end of each run
deleteNodes = True

# Parameters configurations [kafka_partition,batching_value, match_on_previous_add, heap_size, bypass_kafka]

configurations= []


In [None]:
# create table of all settings 
# for each set of settings (for each experiment run) make counter
# -> experimentRun
# (hand over to mismachtches)
experimentRun = 1

## Testing Process

### Preparation

In [None]:
# set up results dataframe 
columns=[
    'experimentRun',
    'evaluate_original',
    'start_block_height',
    'end_block_height',
    'kafka_partitions',
    'batching',
    'match_on_previous_add',
    'heap_size',
    'bypass_kafka',
    'check_correctness',
    'endTimePart1', 
    'endTimePart2', 
    'totalExecutionTime', 
    'timeOutReached']

results_df = pd.DataFrame(columns=columns)

In [None]:
# create directory structure
result_dir     = 'results'
mismatches_dir = 'mismatches'
if os.path.exists(result_dir) == False:
    os.makedirs(result_dir)
if os.path.exists(os.path.join(result_dir, mismatches_dir)) == False:
    os.makedirs(os.path.join(result_dir, mismatches_dir))
    

In [None]:
# collect original nodes that need to be deleted
if evaluate_original == True:
    
    # ToDo: check correctness of starting hight
    # #query neo4j if 


    # collecting nodes for 
    deletion_nodes = getDeletionList(start_block_height = start_block_height, 
                                     end_block_height = end_block_height, 
                                     label_address = node_labels['original']['address'], 
                                     neo4j_location = 'server', 
                                     neo4j_port = '7687')
    
    ### (!) start loop afterwards

In [None]:
deletion_nodes

In [None]:
# Change cypher templates

changeStreamsFile(path = path_to_streams_conf, 
                  kafka_topics = kafka_topics, 
                  evaluate_original = evaluate_original, 
                  matchOnAddress = match_on_previous_add, 
                  getTemplate = True,  # to retrieve cypher template or query for direct insertion 
                  node_labels = node_labels, 
                  evaluation = True)

# insert a sleeping timer in here 

In [None]:
# if partitions change or heap size changes, then stop neo4j


In [None]:
# set kafka partitions
#partitionChanger(partitions)


In [None]:
# change_heap_size

In [None]:
# if partitions change or heap size changes, then start neo4j

# sleeping timer or is there a way to see whether neo4j ready?

### Insertion & Performance Testing

In [None]:

endTimePart1, endTimePart2, totalExecutionTime, timeOutReached = runPerformanceTest(evaluate_original, 
                                                                                    node_labels, 
                                                                                    bypass_kafka, 
                                                                                    start_block_height, 
                                                                                    end_block_height, 
                                                                                    match_on_previous_add, 
                                                                                    kafka_topics)

#print(endTimePart1, endTimePart2, totalExecutionTime, timeOutReached)

In [None]:
print(endTimePart1, endTimePart2, totalExecutionTime, timeOutReached)

### Correctness Testing


In [None]:
if evaluate_original == False and check_correctness == True:
    checkCorrectness(start_block_height, 
                     end_block_height,
                     node_labels,
                     neo4j_port,
                     experimentRun, 
                     printMismatches=False, 
                     saveMismatches=True)


### Result collection

In [None]:
data = [[
    experimentRun,
    evaluate_original,
    start_block_height,
    end_block_height,
    kafka_partitions,
    batching_size,
    match_on_previous_add,
    heap_size,
    bypass_kafka,
    check_correctness,
    endTimePart1, 
    endTimePart2, 
    totalExecutionTime, 
    timeOutReached]]

new_results_entry = pd.DataFrame(columns=columns, data = data)
results_df=pd.concat([results_df,new_results_entry]).sort_index()

### Deletion of inserted nodes

In [None]:
# comment out to enable deletion
if deleteNodes == True:
    if evaluate_original == True:
        deleteOriginalEvaluationNodes(deletion_nodes = deletion_nodes, 
                                      node_labels = node_labels,
                                      neo4j_location = 'server', 
                                      neo4j_port = '7687')
    else:
        deleteTestEvaluationNodes(node_labels = node_labels,
                                  neo4j_location = 'server', 
                                  neo4j_port = '7687')


## After loop - Evaluation Process cleanup

Restore streams file
- remove insertion time

### Save Evaluation results

In [None]:
results_df.to_csv('./results/evaluation_results.csv', index=False)

### Restore streams file

In [None]:
changeStreamsFile(path = path_to_streams_conf, 
                  kafka_topics = kafka_topics, 
                  evaluate_original = True, 
                  matchOnAddress = True, 
                  getTemplate = True,  
                  node_labels = node_labels, 
                  evaluation = False)