In [None]:
# Software Name: DQ_dimensions_performance.ipynb
# SPDX-FileCopyrightText: Copyright (c) 2023 Universidad de Cantabria
# SPDX-License-Identifier: LGPL-3.0 
#
# This software is distributed under the LGPL-3.0 license;
# see the LICENSE file for more details.
#
# Author: Laura MARTIN <lmartin@tlmat.unican.es> et al.

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from bokeh.models import HoverTool
from bokeh.plotting import show

# What does the performance of DQ dimensions look like?

In [2]:
# simulations/median_values/*.csv files created with main.m script in Matlab. 
# Also, the evaluation metrics (such as average and standard deviation) are calculated in that script because of the greater decimal precision of Matlab.
n = 4
for i in range(n):
    accuracy = pd.read_csv("simulations/median_values/accuracy_median.csv", delimiter=";"); accuracy['num_entities'] = accuracy.index
    completeness = pd.read_csv("simulations/median_values/completeness_median.csv", delimiter=";"); completeness['num_entities'] = completeness.index
    precision = pd.read_csv("simulations/median_values/precision_median.csv", delimiter=";"); precision['num_entities'] = precision.index
    timeliness = pd.read_csv("simulations/median_values/timeliness_median.csv", delimiter=";"); timeliness['num_entities'] = timeliness.index

In [3]:
fig_title = "Accuracy performance"
show(hv.render(
    hv.Points((accuracy['num_entities'], accuracy['requesting']), label = "Requesting time").opts(color='#E59866', marker = 'x', size=7, line_width=3)
	*
	hv.Points((accuracy['num_entities'], accuracy['processing']), label = "Processing time").opts(color='#85C1E9', marker = 'x', size=7, line_width=3)
	*
	hv.Curve(accuracy['total'], label = "Total time").opts(color='#52BE80', tools=['box_select', 'lasso_select', 'tap'],
		title=fig_title, ylabel="Temperature", height=600, responsive=True, show_grid=True, show_legend=True)
))

In [4]:
fig_title = "Completeness performance"
show(hv.render(
    hv.Points((completeness['num_entities'], completeness['requesting']), label = "Requesting time").opts(color='#E59866', marker = 'x', size=7, line_width=3)
	*
	hv.Points((completeness['num_entities'], completeness['processing']), label = "Processing time").opts(color='#85C1E9', marker = 'x', size=7, line_width=3)
	*
	hv.Curve(completeness['total'], label = "Total time").opts(color='#52BE80', tools=['box_select', 'lasso_select', 'tap'],
		title=fig_title, ylabel="Temperature", height=600, responsive=True, show_grid=True, show_legend=True)
))

In [5]:
fig_title = "Precision performance"
show(hv.render(
    hv.Points((precision['num_entities'], precision['requesting']), label = "Requesting time").opts(color='#E59866', marker = 'x', size=7, line_width=3)
	*
	hv.Points((precision['num_entities'], precision['processing']), label = "Processing time").opts(color='#85C1E9', marker = 'x', size=7, line_width=3)
	*
	hv.Curve(precision['total'], label = "Total time").opts(color='#52BE80', tools=['box_select', 'lasso_select', 'tap'],
		title=fig_title, ylabel="Temperature", height=600, responsive=True, show_grid=True, show_legend=True)
))

In [6]:
fig_title = "Timeliness performance"
show(hv.render(
    hv.Points((timeliness['num_entities'], timeliness['requesting']), label = "Requesting time").opts(color='#E59866', marker = 'x', size=7, line_width=3)
	*
	hv.Points((timeliness['num_entities'], timeliness['processing']), label = "Processing time").opts(color='#85C1E9', marker = 'x', size=7, line_width=3)
	*
	hv.Curve(timeliness['total'], label = "Total time").opts(color='#52BE80', tools=['box_select', 'lasso_select', 'tap'],
		title=fig_title, ylabel="Temperature", height=600, responsive=True, show_grid=True, show_legend=True)
))