# Figures

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math

In [10]:
def organize_data(data, technique, mappingsEG_TS):
    ordered_data = pd.DataFrame(index=mappingsEG_TS)
    ordered_data = data[1:]
    ordered_data.columns = technique
    ordered_data.replace(np.nan, 0)
    
    for i in range(0, len(ordered_data.index)):
        for j in range(0, len(ordered_data.columns)):
            new_data = float(ordered_data.iloc[i][j])
            ordered_data.iloc[i][j]=np.log10(new_data)

    return ordered_data


## Plots for KGC systems and triplestores

In [26]:
scales = ['1K','10K', '100K', '1M']
technique = ['Std. Reif.', 'RDF-star', 'Named Graphs', 'N-Ary Rel.']
mappingsEG_TS = ['Oxigraph', 'Fuseki', 'GraphDB', 'Morph-KGC', 'SPARQL-Anything']

data_list = {}

for scale in scales:
    file = pd.read_csv("data/mq-" + scale + ".csv")
    file = np.transpose(file)
    data_list[scale] = organize_data(file, [x.lower() for x in technique], mappingsEG_TS)
    print(scale, '\n', data_list[scale], '\n')

1K 
                 std. reif.  rdf-star named graphs n-ary rel.
Oxigraph         -0.136677  -0.30103          NaN   0.056905
Fuseki           -0.508638 -0.920819    -0.522879  -0.275724
GraphDB          -0.585027 -0.886057          NaN  -0.619789
Morph-KGC         0.152288  0.071882     0.053078   0.089905
SPARQL-Anything   0.158362  0.130334     0.143015   0.136721 

10K 
                 std. reif.  rdf-star named graphs n-ary rel.
Oxigraph          1.059185  0.577492          NaN    1.80618
Fuseki            0.704151 -0.154902     0.671173   1.431203
GraphDB           0.383815 -0.017729          NaN   0.742725
Morph-KGC         0.365488  0.240549     0.190332   0.243038
SPARQL-Anything   0.559907  0.485721     0.556303   0.545307 

100K 
                 std. reif.  rdf-star named graphs n-ary rel.
Oxigraph           2.33644  1.582518          NaN   3.802958
Fuseki            2.744661  0.898176     2.026043   2.817129
GraphDB           1.487563  0.963788          NaN   2.477888
Mo

In [27]:
plt.style.use('seaborn-whitegrid')

for scale in data_list:
    barWidth = 0.11
    r1 = np.arange(len(data_list[scale].columns))
    r2 = [x + barWidth for x in r1]
    r3 = [x + barWidth * 2 for x in r1]
    r4 = [x + barWidth * 3 for x in r1]
    r5 = [x + barWidth * 4 for x in r1]

    plt.bar(r1, data_list[scale].values.tolist()[0], width=barWidth, color='#FC9F5B',  # F2BABA
            label='Oxigraph', edgecolor = "gray")
    plt.bar(r2, data_list[scale].values.tolist()[1], width=barWidth, color='#FBD1A2',  # A46593
            label='Fuseki', edgecolor = "gray")
    plt.bar(r3, data_list[scale].values.tolist()[2], width=barWidth, color='#ECE4B7',  # B1ACAA
            label='GraphDB', edgecolor = "gray")
    plt.bar(r4, data_list[scale].values.tolist()[3], width=barWidth, color='#7DCFB6',  # B1ACAA
            label='Morph-KGC', edgecolor = "gray")
    plt.bar(r5, data_list[scale].values.tolist()[4], width=barWidth, color='#33CA7F',  # 2862CC
            label='SPARQL-Anything', edgecolor = "gray")


    plt.xticks([r + barWidth * 1.5 for r in range(len(r1))], map(lambda each: each.strip(""), technique),
               fontsize=12)
    plt.yticks(np.arange(0, 6), ('0', '1', '2', '3', '4', 'TO'), fontsize=14)
    plt.ylim(top=5,bottom=-1)
    plt.ylabel("Time (log$_{10}$(s))", fontsize=15)
    plt.title(scale, fontsize=20)
    plt.legend(mappingsEG_TS, loc='lower center', ncol=6, bbox_to_anchor=[0.5, -0.26], fontsize=13)
    plt.grid(axis='x')

    plt.savefig("./figures/time_" + scale.lower() + ".png", bbox_inches='tight', dpi=700)

    plt.clf()
    
    print("Finished: 'figures/time_" + scale + "'")

Finished: 'figures/time_1K'
Finished: 'figures/time_10K'
Finished: 'figures/time_100K'
Finished: 'figures/time_1M'


<Figure size 640x480 with 0 Axes>

## Plots for details on queries in triplestores

In [48]:
scales = ['1K','10K', '100K', '1M']
query = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12']
mappingsEG_TS = ['Oxigraph', 'Fuseki', 'GraphDB']

data_list = {}

for scale in scales:
    file = pd.read_csv("data/q-" + scale + ".csv", sep='\t')
    file = np.transpose(file)
    data_list[scale] = organize_data(file, [x.lower() for x in query], mappingsEG_TS)
    print(scale, '\n', data_list[scale], '\n')

1K 
                 q1        q2        q3        q4        q5        q6  \
Oxigraph -0.274905 -0.308919       NaN -0.199283  0.047664       NaN   
Fuseki   -0.853872 -1.004365 -0.876148 -0.876148 -0.477556 -0.886057   
GraphDB  -0.632644 -0.882729       NaN -0.882729 -0.560667       NaN   

                q7        q8        q9       q10       q11       q12  
Oxigraph -0.238824 -0.317855       NaN -0.321482  -0.38405  -0.06148  
Fuseki   -0.823909 -0.737549 -0.931814 -0.906578 -0.917215 -0.636388  
GraphDB  -0.534617 -0.694649       NaN -0.598599 -0.847712 -0.595166   

10K 
                 q1        q2        q3        q4        q5        q6  \
Oxigraph  0.620344  0.566673       NaN  0.711301  1.742136       NaN   
Fuseki    -0.03105 -0.181115 -0.002177 -0.120331  1.174525 -0.137272   
GraphDB   0.302547  -0.03292       NaN -0.025028  0.963126       NaN   

                q7        q8        q9       q10       q11       q12  
Oxigraph  0.628389  0.498448       NaN   0.58872  0.45

In [49]:
plt.style.use('seaborn-whitegrid')

for scale in data_list:
    barWidth = 0.23
    r1 = np.arange(len(data_list[scale].columns))
    r2 = [x + barWidth for x in r1]
    r3 = [x + barWidth * 2 for x in r1]

    plt.bar(r1, data_list[scale].values.tolist()[0], width=barWidth, color='#FC9F5B',  # F2BABA
            label='Oxigraph', edgecolor = "gray")
    plt.bar(r2, data_list[scale].values.tolist()[1], width=barWidth, color='#FBD1A2',  # A46593
            label='Fuseki', edgecolor = "gray")
    plt.bar(r3, data_list[scale].values.tolist()[2], width=barWidth, color='#ECE4B7',  # B1ACAA
            label='GraphDB', edgecolor = "gray")


    plt.xticks([r + barWidth * 1.5 for r in range(len(r1))], map(lambda each: each.strip(""), technique),
               fontsize=10)
    plt.yticks(np.arange(0, 6), ('0', '1', '2', '3', '4', 'TO'), fontsize=14)
    plt.ylim(top=5,bottom=-1)
    plt.ylabel("Time (log$_{10}$(s))", fontsize=15)
    plt.title(scale, fontsize=20)
    plt.legend(mappingsEG_TS, loc='lower center', ncol=6, bbox_to_anchor=[0.5, -0.26], fontsize=13)
    plt.grid(axis='x')

    #plt.show()
    plt.savefig("./figures/time_queries_" + scale.lower() + ".png", bbox_inches='tight', dpi=700)

    plt.clf()
    
    print("Finished: 'figures/time_queries_" + scale + "'")

Finished: 'figures/time_queries_1K'
Finished: 'figures/time_queries_10K'
Finished: 'figures/time_queries_100K'
Finished: 'figures/time_queries_1M'


<Figure size 640x480 with 0 Axes>