In [265]:
import pandas as pd
import ast
import builtins
import json
from types import FunctionType

from PaladinEngine.archive.archive import Archive
from PaladinUI.paladin_server.paladin_server import PaladinServer

In [266]:
INPUT_FILE_PATH = "diff_tool_input.json"

In [267]:
def create_archive_from_csv(csv_path):
    dataframe = load_csv_to_dataframe(csv_path)
    archive_from_csv = Archive()
    rows = dataframe.to_records()
    items_for_future_build = []
    for row in rows: #TODO: look for a builtin function instead of loop?
        record_key, record_value, is_build_needed = create_record_key_value(row)
        if is_build_needed:
            items_for_future_build.append((record_key, record_value))
        else:
            archive_from_csv.store_with_original_time(record_key, record_value)
    # Build items
    for record_key, record_value in items_for_future_build:
        new_record_value = build_object_from_archive(archive_from_csv, record_value)
        archive_from_csv.store_with_original_time(record_key, new_record_value)
    return archive_from_csv

def load_csv_to_dataframe(csv_path):
    dataframe = pd.read_csv(csv_path)
    dataframe = dataframe.fillna('')
    return dataframe

def create_record_key_value(row):
    record_key = Archive.Record.RecordKey(int(row.container_id), row.field, row.stub_name, row.kind)
    is_build_needed = False

    if record_key.stub_name == "__AS__" and row.rtype in ['list', 'dict', 'tuple', 'set']:
        value_of_record = ast.literal_eval(row.value)
        type_of_record = row.rtype
        is_build_needed = True
    elif row.rtype == 'function':
        value_of_record = row.value
        type_of_record = FunctionType
    elif row.rtype == 'list':
        value_of_record = ast.literal_eval(row.value)
        type_of_record = list
    elif row.rtype == 'bool':
        value_of_record = True if row.value == 'True' else False
        type_of_record = bool
    elif row.rtype == 'dict':
        value_of_record = ast.literal_eval(row.value)
        type_of_record = dict #TODO: add same for tuple and set
    else:
        value_of_record = getattr(builtins, row.rtype)(row.value)
        type_of_record = type(value_of_record)

    record_value = Archive.Record.RecordValue(
        record_key, type_of_record, value_of_record, row.expression,
        int(row.line_no), int(row.time), row.extra
    )
    return record_key, record_value, is_build_needed

def build_object_from_archive(archive_from_csv, record_value):
    print(f"v.rtype: {record_value.rtype} | v.value: {record_value.value}")
    value_of_record = archive_from_csv.build_object(record_value.value, record_value.time)
    record_value.value = value_of_record
    record_value.rtype = type(value_of_record)
    print(f"record_value.rtype: {record_value.rtype} | value_of_record: {value_of_record}")
    return record_value

In [268]:
def convert_query_result_to_presentable_table(query_result):
    result = query_result['result']['query']
    print(result)
    json_data = json.loads(result)
    json_data.pop('keys')
    result_df = pd.DataFrame.from_dict(json_data, orient="index")
    print(result_df)
    result_df.reset_index(inplace=True)
    times_column_name = 'Time Range'
    result_df = result_df.rename(columns={'index': times_column_name})
    print(result_df)
    return result_df

In [269]:
# def get_parameter_matches_with_previous_query(parameters, csv_path):
#     parameter_matches = {}
#     for parameter in parameters:
#         parameter_matches[parameter] = input(f"Match {parameter} in {csv_path}: ")
#     return parameter_matches
#
# def replace_matched_parameters(previous_query, parameter_matches):
#     matched_query = previous_query
#     for source_parameter, dest_parameter in parameter_matches.items():
#         matched_query = matched_query.replace(source_parameter, dest_parameter)
#     return matched_query
#
# def convert_parameter_to_queryable(parameter):
#     location_separator = '@'
#     if location_separator in parameter:
#         parameter_name, parameter_location = parameter.split(location_separator)
#         queryable = f"[[{parameter_name}]]@{parameter_location}"
#     else:
#         queryable = f"[[{parameter}]]"
#     return queryable

In [270]:
def create_dataframes_from_csvs(csv_files):
    archives = []
    dataframes = []

    for csv_file_info in csv_files:
        csv_archive = create_archive_from_csv(csv_file_info["csv_file_path"])
        archives.append(csv_archive)
        server = PaladinServer.create('', csv_archive)
        raw_result = server.query(csv_file_info["query"], csv_file_info["start_time"], csv_file_info["end_time"])
        presentable_df = convert_query_result_to_presentable_table(raw_result)
        print(presentable_df)
        dataframes.append(presentable_df)

    return dataframes


In [271]:
def main():
    with open(INPUT_FILE_PATH, 'r') as fileobj:
        data = json.load(fileobj)
    csv_files = data["csv_files"]
    dataframes = create_dataframes_from_csvs(csv_files)

    result_merge_condition = data["result_merge_condition"]
    result_rows = pd.merge(
        dataframes[0], dataframes[1],
        how="inner",
        left_on=result_merge_condition["left_on"], right_on=result_merge_condition["right_on"],
        indicator=True
    )
    result_rows = result_rows[~((result_rows['result@12'].isna()) & (result_rows['result@16'].isna()))]
    print(result_rows)

    iteration_merge_condition = data["iteration_merge_condition"]
    iteration_rows = pd.merge(
        dataframes[0], dataframes[1],
        how="outer",
        left_on=iteration_merge_condition["left_on"], right_on=iteration_merge_condition["right_on"],
        indicator=True
    )
    iteration_rows = iteration_rows[((iteration_rows['result@12'].isna()) & (iteration_rows['result@16'].isna()))]
    print(iteration_rows)

    merged = pd.concat([iteration_rows, result_rows], ignore_index=True)
    print(merged)

In [272]:
if __name__ == '__main__':
    main()

{"(0, 1)": {"number@11": [null], "result@12": [null], "i@4": [null]}, "(3, 6)": {"number@11": 13, "result@12": [null], "i@4": [null]}, "(8, 8)": {"number@11": 13, "result@12": [null], "i@4": 3}, "(10, 10)": {"number@11": 13, "result@12": [null], "i@4": 5}, "(12, 12)": {"number@11": 13, "result@12": [null], "i@4": 7}, "(14, 14)": {"number@11": 13, "result@12": [null], "i@4": 9}, "(16, 16)": {"number@11": 13, "result@12": [null], "i@4": 11}, "(18, 18)": {"number@11": 13, "result@12": [null], "i@4": 12}, "(20, 500)": {"number@11": 13, "result@12": true, "i@4": 12}, "keys": ["number@11", "result@12", "i@4"]}
<class 'str'>
{'(0, 1)': {'number@11': [None], 'result@12': [None], 'i@4': [None]}, '(3, 6)': {'number@11': 13, 'result@12': [None], 'i@4': [None]}, '(8, 8)': {'number@11': 13, 'result@12': [None], 'i@4': 3}, '(10, 10)': {'number@11': 13, 'result@12': [None], 'i@4': 5}, '(12, 12)': {'number@11': 13, 'result@12': [None], 'i@4': 7}, '(14, 14)': {'number@11': 13, 'result@12': [None], 'i@4

TypeError: unhashable type: 'list'

In [None]:
"""
Run:
python C:\Avital\Github\paladin_engine\PaladinUI\paladin_cli\paladin_cli.py --run --output-file output.py --csv DiffTool\is_prime_naive.csv --run-debug-server True --port 1234
C:\Avital\Github\paladin_engine\PaladinEngine\tests\test_resources\examples\is_prime\is_prime_naive.py

#TODO:
1) keys id: update the ctor
2) time: 0 is converted to [1,5] (line_no==85)
3) set: not supported

#TODO: 28.11
1) keys: id: update the ctor? ask Oren why it is needed id(v.key)
2) store: write our function which doesn't change time
3) represent: call represent asap (maybe in ctor) instead of in to_table
4) paladin_server.py: debug_info/query/ - check that the Archive is OK

#Questions:
1) Will we always have exactly 2 tables to compare, or can there be more? A: let's start with 2, but can be more
2) How exactly are we supposed to connect each two tables? For example in is_prime,
should we just match the 13 rows of 'square' to the first 13 rows of 'naive'?
3) User input - how? Using python's input(), or maybe read from file?
Maybe should depend on the number of parameters in the query
A: use files

#TODO: 06.12
1) not interactive, use files
2) join two tables using merge/join pandas
3) we get as input the query for creating the match between both tables (total_slices_1 == total_slices_2)
4) represent: create object and check
5) change set to list
6) create more examples, more complex than is_prime
"""