#### Setting

In [6]:
import sys
sys.path.append('../') # To import from parent dir

import pandas as pd
import numpy as np

import os

import time # just for dev purpose
from IPython.display import display # Just for displaying DF nicely

from itertools import product

import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib.patches import Rectangle
import datetime as dt

from util.util import *
import util.const
import util.visUtil

# UI Hierarchy Based Matrix Profil Routine Discovery

## Preliminaries

Given a UI log the user has to select all context parameters in the UI log and align them to the reference model of Abb and Rehse.

### Alignment Description

In this Jupyter Notebook you achieve this by adding the context parameters to each row in the subsequent notebook-cell.<br>
Each cell is one layer of the reference model. Add the context parameters describing the level into the lists.<br>
Important: For nested UI Groups add a new group below the UI group list. Two UI groups on the same level can be added into the same list. E.g. a PPT slide and a Excel worksheet can be in the same level.<br><br>
 
Example based on SmartRPA Logs:<br>
systems = ["category"]<br>
applications = ["application"]<br>
uiGroup1 = ["workbook","browser_url","title"]<br>
uiGroup2 = ["current_worksheet"] 
uiGroup3 = ["cell_range","cell_range_number"] <br>
uiElement = ["mouse_coord","tag_category","xpath","event_src_path","event_dest_path","tag_name","tag_title","tag_href","tag_innerText","tab_pinned","tab_audible","tab_muted","window_ingognito","tab_moved_from_index","tab_moved_to_index","id"]<br>
actions = ["concept:name","eventQual"]<br>

### Alignment Hints
* Add as many UI group levels as necessary
* With each level the number of attributes outlining the level can increase, i.e., a system is described only by one parameter, e.g., system name, but an application can be at two parameters already, e.g., name and type (Edge, Browser), and an UI element can be defined by multiple parameters.
* If the UI log does not specify a level, e.g., the system or application, level the level empty in the method below.
* If you specify multiple UI group levels, they must be filled top-down. It is penalized more, if for example, the browser window is in UI group level 2 and no UI group is in level 1, which is a gap. If this is the case move the UI group up in the levels.

In [28]:
# ---- Initialize all context parameters that will be used ----
# Note: Read the alignment hints above
systems = ["category"]
applications = ["application"]
uiGroup1 = ["workbook","browser_url","title"]
uiGroup2 = ["current_worksheet"] 
uiGroup3 = ["cell_range","cell_range_number"]
uiElement = ["mouse_coord","tag_category","xpath","event_src_path","event_dest_path","tag_name","tag_title","tag_href",
             "tag_innerText","tab_pinned","tab_audible","tab_muted","window_ingognito","tab_moved_from_index","tab_moved_to_index","id"]
actions = ["concept:name","eventQual"]

# ---- Add all hierarchy levels into a list ----
hierarchy_list = [systems, applications, uiGroup1, uiGroup2, uiGroup3, uiElement, actions]

# ---- Define the path in which all files are stored ----
folder_path = "../logs/smartRPA/validation/"
folder_path = "../logs/smartRPA/percentageComparison/"

UILogValidation_filename = "validation_data.csv"
variableLenValidation_filename = "var_len_validation_data.csv"
percentagData_filename = "validationDataPercentage.csv"

## Implementation - Just Execute - Do not change

In [None]:
# ---- Do not change from here ----

# ---- Initialize all variables that will be used ----
dtypes = util.const.dtype

varLenUILogs = []
UILogs = []
percentageLogs = []

experimentColumns = ["experimentID","EncodingMethod","uiLogName","variationPercentage","percentageMotifsOverLog","motifLength","windowSize",
                     "windowSizeMatch","motifsToBeDiscovered","motifsDiscovered","numberOfOccurrancesToBeDiscovered",
                     "OccurancesDiscovered/TP","topMotifsT","DiscoveryPercentage","alignmentAccuracy","Precision","Recall","F1-Score","ExecutionTime","executionTimeEncoding","executionTimeDiscovery",
                     "motifSpots","discoveredSpots","DiscoveryLoops"]

# ---- Getting the relevant files from the folder and sorting them into different lists for processing ----

for file in os.listdir(folder_path):
    if file.startswith("LenLog"):
        percentageLogs.append(file)

# Check if data created for percentage based comparison is available
try:
    percentageValData = pd.read_csv(folder_path + percentagData_filename)
    percentageValAvailable = True
except FileNotFoundError as e:
    print(f"Could not read {percentagData_filename} from the folder.\n{e}")
    percentageValAvailable = False

In [29]:
experimentResults = pd.DataFrame(columns=experimentColumns)

for i, log in enumerate(percentageLogs):
    file = pd.read_csv(folder_path + log)
    comparisonVariables = percentageValData.loc[percentageValData['uiLogName'] == log]
    # Where the motifs were initially added
    insertSpots = comparisonVariables["motifSpots"]
    # Where are the motifs actually in the dataframe
    inserted_motif_spots = extract_numbers(insertSpots[insertSpots.index[0]])
    
    # How many motifs were added
    numMotifs = comparisonVariables["motifsToBeDiscovered"]
    selected_columns = set(col for col_list in hierarchy_list for col in col_list)
    
    # Keep only the columns that exist in df
    selected_columns = [col for col in selected_columns if col in file.columns]

  file = pd.read_csv(folder_path + log)
  file = pd.read_csv(folder_path + log)


KeyboardInterrupt: 

In [30]:
selected_columns

['tab_moved_from_index',
 'concept:name',
 'tag_innerText',
 'tab_muted',
 'eventQual',
 'tag_title',
 'tab_moved_to_index',
 'window_ingognito',
 'workbook',
 'id',
 'title',
 'tab_audible',
 'tab_pinned',
 'browser_url',
 'cell_range_number',
 'xpath',
 'application',
 'current_worksheet',
 'category',
 'cell_range',
 'event_dest_path',
 'tag_category',
 'tag_href',
 'tag_name',
 'event_src_path',
 'mouse_coord']