In [2]:
from datasets import Dataset
from glob import glob
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset_df = pd.DataFrame() 

for x in glob("../datasets/*"):
  df = Dataset.load_from_disk(x).to_pandas()
  df["source"] = x.split("/")[-1].split("_")[0]
  dataset_df = pd.concat([dataset_df,  df])

dataset_df["comment_len"] = dataset_df.target_description.apply(lambda td: len(td["comment"].split()))
dataset_df["axioms"] = dataset_df.target_description.apply(lambda td: sum(len(f) for f in td["facts"].values()))

In [4]:
dataset_df.groupby("source").comment_len.mean()

source
arco         34.64
dbpedia      17.59
dul          55.16
foodon       35.03
frbr         10.17
go           22.67
helis         9.50
schemaorg    16.31
Name: comment_len, dtype: float64

In [5]:
dataset_df.groupby("source").axioms.mean()

source
arco         3.88
dbpedia      3.26
dul          5.42
foodon       1.77
frbr         7.97
go           0.62
helis        1.44
schemaorg    1.18
Name: axioms, dtype: float64

In [10]:
print(dataset_df.assign(desc="").groupby("source").agg({
  "desc": lambda row: "",
  "comment_len": lambda row: f"${row.mean():2.2f} \pm {row.std():2.2f}$",
  "axioms": lambda row: f"${row.mean():2.2f} \pm {row.std():2.2f}$"
}).to_latex())

\begin{tabular}{llll}
\toprule
 & desc & comment_len & axioms \\
source &  &  &  \\
\midrule
arco &  & $34.64 \pm 44.43$ & $3.88 \pm 2.68$ \\
dbpedia &  & $17.59 \pm 15.65$ & $3.26 \pm 8.76$ \\
dul &  & $55.16 \pm 86.71$ & $5.42 \pm 7.09$ \\
foodon &  & $35.03 \pm 18.99$ & $1.77 \pm 0.93$ \\
frbr &  & $10.17 \pm 0.38$ & $7.97 \pm 5.17$ \\
go &  & $22.67 \pm 28.49$ & $0.62 \pm 1.16$ \\
helis &  & $9.50 \pm 4.24$ & $1.44 \pm 0.99$ \\
schemaorg &  & $16.31 \pm 16.27$ & $1.18 \pm 0.41$ \\
\bottomrule
\end{tabular}



In [19]:
sample = dataset_df[dataset_df.source == "dul"].iloc[2]

print(sample.target)
for k, v in sample.target_description["facts"].items():
  print(k, ", ".join(v))


http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role
disjoint union of 
disjoint with Role
equivalent to 
is domain of has task a Task, is role defined in a Description, is role of a Object
is range of Description defines role, Object has role, Task is task of
subclass of classifies only Object, Concept, has part only Role
superclass of 


In [13]:
sample = dataset_df[dataset_df.source == "foodon"].iloc[14]

print(sample.target)
for k, v in sample.target_description["facts"].items():
  print(k, ", ".join(v))


http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role
disjoint union of 
disjoint with Role
equivalent to 
is domain of has task a Task, is role defined in a Description, is role of a Object
is range of Description defines role, Object has role, Task is task of
subclass of classifies only Object, Concept, has part only Role
superclass of 
