Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
---
---
% 2022
@article{schelterspecialissue,
title={Letter from the Special Issue Editor},
author={Sebastian Schelter},
year={2022},
volume={45},
issue={1},
month={March},
journal={IEEE Data Engineering Bulletin (Special issue on Directions Towards GDPR-Compliant Data Systems and Applications)},
url={http://sites.computer.org/debull/A22mar/p2.pdf}
}
@proceedings{DBLP:conf/esws/2022,
editor = {Paul Groth and
Maria{-}Esther Vidal and
Fabian M. Suchanek and
Pedro A. Szekely and
Pavan Kapanipathi and
Catia Pesquita and
Hala Skaf{-}Molli and
Minna Tamper},
title = {The Semantic Web - 19th International Conference, {ESWC} 2022, Hersonissos,
Crete, Greece, May 29 - June 2, 2022, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {13261},
publisher = {Springer},
year = {2022},
url = {https://2022.eswc-conferences.org/accepted-papers/},
doi = {10.1007/978-3-031-06981-9},
isbn = {978-3-031-06980-2},
timestamp = {Fri, 03 Jun 2022 16:42:29 +0200},
biburl = {https://dblp.org/rec/conf/esws/2022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cidr/HulsebosGGDGD22,
author = {Madelon Hulsebos and
Sneha Gathani and
James Gale and
Isil Dillig and
Paul Groth and
{\c{C}}agatay Demiralp},
title = {Making Table Understanding Work in Practice},
booktitle = {12th Conference on Innovative Data Systems Research, {CIDR} 2022,
Chaminade, CA, USA, January 9-12, 2022},
publisher = {www.cidrdb.org},
year = {2022},
url = {https://www.cidrdb.org/cidr2022/papers/a48-hulsebos.pdf},
timestamp = {Mon, 20 Jun 2022 12:03:52 +0200},
biburl = {https://dblp.org/rec/conf/cidr/HulsebosGGDGD22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cidr/SchelterGGSK022a,
author = {Sebastian Schelter and
Stefan Grafberger and
Shubha Guha and
Olivier Sprangers and
Bojan Karlas and
Ce Zhang},
title = {Screening Native Machine Learning Pipelines with ArgusEyes},
booktitle = {12th Conference on Innovative Data Systems Research, {CIDR} 2022,
Chaminade, CA, USA, January 9-12, 2022},
publisher = {www.cidrdb.org},
year = {2022},
url = {https://www.cidrdb.org/cidr2022/papers/a1-schelter.pdf},
timestamp = {Fri, 15 Jul 2022 13:31:19 +0200},
biburl = {https://dblp.org/rec/conf/cidr/SchelterGGSK022a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@INPROCEEDINGS{gitschemas,
author={Döhmen, Till and Hulsebos, Madelon and Beecks, Christian and Schelter, Sebastian},
booktitle={2022 IEEE 38th International Conference on Data Engineering Workshops (ICDEW)},
title={GitSchemas: A Dataset for Automating Relational Data Preparation Tasks},
year={2022},
volume={},
number={},
pages={74-78},
url={https://madelonhulsebos.github.io/assets/GitSchemas.pdf},
doi={10.1109/ICDEW55742.2022.00016}
}
@inproceedings{10.1145/3514221.3517901,
author = {Kersbergen, Barrie and Sprangers, Olivier and Schelter, Sebastian},
title = {Serenade - Low-Latency Session-Based Recommendation in e-Commerce at Scale},
year = {2022},
isbn = {9781450392495},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://ssc.io/pdf/modds003.pdf},
doi = {10.1145/3514221.3517901},
abstract = {Session-based recommendation predicts the next item with which a user will interact, given a sequence of her past interactions with other items. This machine learning problem targets a core scenario in e-commerce platforms, which aim to recommend interesting items to buy to users browsing the site. Session-based recommenders are difficult to scale due to their exponentially large input space of potential sessions. This impedes offline precomputation of the recommendations, and implies the necessity to maintain state during the online computation of next-item recommendations.We propose VMIS-kNN, an adaptation of a state-of-the-art nearest neighbor approach to session-based recommendation, which leverages a prebuilt index to compute next-item recommendations with low latency in scenarios with hundreds of millions of clicks to search through. Based on this approach, we design and implement the scalable session-based recommender system Serenade, which is in production usage at bol.com, a large European e-commerce platform.We evaluate the predictive performance of VMIS-kNN, and show that Serenade can answer a thousand recommendation requests per second with a 90th percentile latency of less than seven milliseconds in scenarios with millions of items to recommend. Furthermore, we present results from a three week long online A/B test with up to 600 requests per second for 6.5 million distinct items on more than 45 million user sessions from our e-commerce platform. To the best of our knowledge, we provide the first empirical evidence that the superior predictive performance of nearest neighbor approaches to session-based recommendation in offline evaluations translates to superior performance in a real world e-commerce setting.},
booktitle = {Proceedings of the 2022 International Conference on Management of Data},
pages = {150–159},
numpages = {10},
location = {Philadelphia, PA, USA},
series = {SIGMOD/PODS '22},
month = {june}
}
@inproceedings{10.1145/3533028.3533303,
author = {Grafberger, Stefan and Groth, Paul and Schelter, Sebastian},
title = {Towards Data-Centric What-If Analysis for Native Machine Learning Pipelines},
year = {2022},
isbn = {9781450393751},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://stefan-grafberger.com/mlwhatif-deem.pdf},
doi = {10.1145/3533028.3533303},
abstract = {An important task of data scientists is to understand the sensitivity of their models to changes in the data that the models are trained and tested upon. Currently, conducting such data-centric what-if analyses requires significant and costly manual development and testing with the corresponding chance for the introduction of bugs. We discuss the problem of data-centric what-if analysis over whole ML pipelines (including data preparation and feature encoding), propose optimisations that reuse trained models and intermediate data to reduce the runtime of such analysis, and finally conduct preliminary experiments on three complex example pipelines, where our approach reduces the runtime by a factor of up to six.},
booktitle = {Proceedings of the Sixth Workshop on Data Management for End-To-End Machine Learning},
articleno = {3},
numpages = {5},
location = {Philadelphia, Pennsylvania},
month = {june},
series = {DEEM '22}
}
@article{10.1145/3488717,
author = {Stoyanovich, Julia and Abiteboul, Serge and Howe, Bill and Jagadish, H. V. and Schelter, Sebastian},
title = {Responsible Data Management},
year = {2022},
issue_date = {June 2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {65},
number = {6},
issn = {0001-0782},
url = {https://doi.org/10.1145/3488717},
doi = {10.1145/3488717},
abstract = {Perspectives on the role and responsibility of the data-management research community in designing, developing, using, and overseeing automated decision systems.},
journal = {Communications of the ACM},
month = {may},
pages = {64–74},
numpages = {11}
}
@article{10.1145/3486897,
author = {Crusoe, Michael R. and Abeln, Sanne and Iosup, Alexandru and Amstutz, Peter and Chilton, John and Tijani\'{c}, Neboj\v{s}a and M\'{e}nager, Herv\'{e} and Soiland-Reyes, Stian and Gavrilovi\'{c}, Bogdan and Goble, Carole and {The CWL Community}},
title = {Methods Included},
year = {2022},
issue_date = {June 2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {65},
number = {6},
issn = {0001-0782},
url = {https://doi.org/10.1145/3486897},
doi = {10.1145/3486897},
abstract = {Standardizing computational reuse and portability with the Common Workflow Language.},
journal = {Communications of the ACM},
month = {may},
pages = {54–63},
numpages = {10}
}
@inproceedings{lippe2022citris,
title = {{CITRIS: Causal Identifiability from Temporal Intervened Sequences}},
author = {Lippe, Phillip and Magliacane, Sara and L{\"o}we, Sindy and Asano, Yuki M. and Cohen, Taco and Gavves, Efstratios},
year = 2022,
booktitle = {Proceedings of the 39th International Conference on Machine Learning, {ICML}},
arxiv = {2202.03169},
month = {may}
}
@inproceedings{daza-etal-2022-slotgan,
title = "{S}lot{GAN}: Detecting Mentions in Text via Adversarial Distant Learning",
author = "Daza, Daniel and
Cochez, Michael and
Groth, Paul",
booktitle = "Proceedings of the Sixth Workshop on Structured Prediction for NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.spnlp-1.4",
doi = "10.18653/v1/2022.spnlp-1.4",
pages = "32--39",
abstract = "We present SlotGAN, a framework for training a mention detection model that only requires unlabeled text and a gazetteer. It consists of a generator trained to extract spans from an input sentence, and a discriminator trained to determine whether a span comes from the generator, or from the gazetteer.We evaluate the method on English newswire data and compare it against supervised, weakly-supervised, and unsupervised methods. We find that the performance of the method is lower than these baselines, because it tends to generate more and longer spans, and in some cases it relies only on capitalization. In other cases, it generates spans that are valid but differ from the benchmark. When evaluated with metrics based on overlap, we find that SlotGAN performs within 95{\%} of the precision of a supervised method, and 84{\%} of its recall. Our results suggest that the model can generate spans that overlap well, but an additional filtering mechanism is required.",
}
@article{10.1145/3522586,
author = {Tama\v{s}auskaitundefined, Gytundefined and Groth, Paul},
title = {Defining a Knowledge Graph Development Process Through a Systematic Review},
year = {2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
issn = {1049-331X},
url = {https://doi.org/10.1145/3522586},
doi = {10.1145/3522586},
abstract = {Knowledge graphs are widely used in industry and studied within the academic community. However, the models applied in the development of knowledge graphs vary. Analysing and providing a synthesis of the commonly used approaches to knowledge graph development would provide researchers and practitioners a better understanding of the overall process and methods involved. Hence, this paper aims to define the overall process of knowledge graph development and its key constituent steps. For this purpose, a systematic review and a conceptual analysis of the literature was conducted. The resulting process was compared to case studies to evaluate its applicability. The proposed process suggests a unified approach and provides guidance for both researchers and practitioners when constructing and managing knowledge graphs.},
journal = {ACM Transactios on Software Engineering and Methodology},
month = {feb},
keywords = {knowledge graph construction, information integration, development process semantic network, knowledge graphs}
}
@article{10.1162/dint_a_00135,
author = {Soiland-Reyes, Stian and Bayarri, Genís and Andrio, Pau and Long, Robin and Lowe, Douglas and Niewielska, Ania and Hospital, Adam and Groth, Paul},
title = "{Making Canonical Workflow Building Blocks Interoperable across Workflow Languages}",
journal = {Data Intelligence},
pages = {1-16},
year = {2022},
month = {03},
abstract = "{We introduce the concept of Canonical Workflow Building Blocks (CWBB), a methodology of describing and wrapping computational tools, in order for them to be utilised in a reproducible manner from multiple workflow languages and execution platforms. The concept is implemented and demonstrated with the BioExcel Building Blocks library (BioBB), a collection of tool wrappers in the field of computational biomolecular simulation. Interoperability across different workflow languages is showcased through a protein Molecular Dynamics setup transversal workflow, built using this library and run with 5 different Workflow Manager Systems (WfMS). We argue such practice is a necessary requirement for FAIR Computational Workflows and an element of Canonical Workflow Frameworks for Research (CWFR) in order to improve widespread adoption and reuse of computational methods across workflow language barriers.}",
issn = {2641-435X},
doi = {10.1162/dint_a_00135},
url = {https://doi.org/10.1162/dint\_a\_00135},
eprint = {https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint\_a\_00135/1996729/dint\_a\_00135.pdf},
}
@proceedings{DBLP:conf/semweb/2021semtab,
author = {Ernesto Jim{\'{e}}nez{-}Ruiz and
Vasilis Efthymiou and
Jiaoyan Chen and
Vincenzo Cutrona and
Oktie Hassanzadeh and
Juan Sequeda and
Kavitha Srinivas and
Nora Abdelmageed and
Madelon Hulsebos and
Daniela Oliveira and
Catia Pesquita},
title = {Proceedings of the Semantic Web Challenge on Tabular Data to Knowledge
Graph Matching co-located with the 20th International Semantic Web
Conference {(ISWC} 2021), Virtual conference, October 27, 2021},
series = {{CEUR} Workshop Proceedings},
volume = {3103},
publisher = {CEUR-WS.org},
year = {2022},
url = {http://ceur-ws.org/Vol-3103},
urn = {urn:nbn:de:0074-3103-0},
timestamp = {Sat, 12 Mar 2022 15:06:27 +0100},
biburl = {https://dblp.org/rec/conf/semweb/2021semtab.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{
huang2022adarl,
title={Ada{RL}: What, Where, and How to Adapt in Transfer Reinforcement Learning},
author={Biwei Huang and Fan Feng and Chaochao Lu and Sara Magliacane and Kun Zhang},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=8H5bpVwvt5},
award = {{Spotlight Presentation}},
awardurl = {https://openreview.net/group?id=ICLR.cc/2022/Conference#spotlight-submissions},
}
@article{Grafberger2022,
doi = {10.1007/s00778-021-00726-w},
url = {https://stefan-grafberger.com/mlinspect-journal.pdf},
year = {2022},
month = jan,
publisher = {Springer Science and Business Media {LLC}},
author = {Stefan Grafberger and Paul Groth and Julia Stoyanovich and Sebastian Schelter},
title = {Data distribution debugging in machine learning pipelines},
journal = {The {VLDB} Journal}
}
@article{Schrder2022,
doi = {10.1186/s13326-021-00257-x},
url = {https://doi.org/10.1186/s13326-021-00257-x},
year = {2022},
month = jan,
publisher = {Springer Science and Business Media {LLC}},
volume = {13},
number = {1},
author = {Max Schr\"{o}der and Susanne Staehlke and Paul Groth and J. Barbara Nebe and Sascha Spors and Frank Kr\"{u}ger},
title = {Structure-based knowledge acquisition from electronic lab notebooks for research data provenance documentation},
journal = {Journal of Biomedical Semantics}
}
@article{rocrate2022,
title = {Packaging research artefacts with RO-Crate},
ISSN = {24518492, 24518484},
url = {https://doi.org/10.3233/DS-210053},
DOI = {10.3233/DS-210053},
abstractNote = {An increasing number of researchers support reproducibility by including pointers to and descriptions of datasets, software and methods in their publications. However, scientific articles may be ambiguous, incomplete and difficult to process by automated systems. In this paper we introduce RO-Crate, an open, community-driven, and lightweight approach to packaging research artefacts along with their metadata in a machine readable manner. RO-Crate is based on Schema.org annotations in JSON-LD, aiming to establish best practices to formally describe metadata in an accessible and practical way for their use in a wide variety of situations. An RO-Crate is a structured archive of all the items that contributed to a research outcome, including their identifiers, provenance, relations and annotations. As a general purpose packaging approach for data and their metadata, RO-Crate is used across multiple areas, including bioinformatics, digital humanities and regulatory sciences. By applying “just enough” Linked Data standards, RO-Crate simplifies the process of making research outputs FAIR while also enhancing research reproducibility. An RO-Crate for this article11 https://w3id.org/ro/doi/10.5281/zenodo.5146227 is archived at https://doi.org/10.5281/zenodo.5146227.},
journal = {Data Science},
publisher = {IOS Press},
author = {Soiland-Reyes, Stian and Sefton, Peter and Crosas, Mercè and Castro, Leyla Jael and Coppens, Frederik and Fernández, José M. and Garijo, Daniel and Gr\"{u}ning, Bj\"{o}rn and La Rosa, Marco and Leo, Simone and et al.},
editor = {Peroni, SilvioEditor},
year = {2022},
month = Jan,
pages = {1–42}
}
% 2021
@book{alam_further_2021,
series = {Studies on the {Semantic} {Web}},
title = {Further with {Knowledge} {Graphs}: {Proceedings} of the 17th {International} {Conference} on {Semantic} {Systems}, 6–9 {September} 2021, {Amsterdam}, {The} {Netherlands}},
volume = {53},
isbn = {9781643682006 9781643682013},
shorttitle = {Further with {Knowledge} {Graphs}},
url = {https://ebooks.iospress.nl/doi/10.3233/SSW53},
urldate = {2022-02-05},
publisher = {IOS Press},
editor = {Alam, Mehwish and Groth, Paul and de Boer, Victor and Pellegrini, Tassilo and Pandit, Harshvardhan J. and Montiel, Elena and Rodríguez Doncel, Víctor and McGillivray, Barbara and Meroño-Peñuela, Albert},
month = aug,
year = {2021},
doi = {10.3233/SSW53},
}
@article{Nevin2021,
doi = {10.1016/j.patter.2021.100397},
url = {https://doi.org/10.1016/j.patter.2021.100397},
year = {2021},
month = nov,
publisher = {Elsevier {BV}},
pages = {100397},
author = {James Nevin and Michael Lees and Paul Groth},
title = {The non-linear impact of data handling on network diffusion models},
journal = {Patterns}
}
@inproceedings{szarkowska_quality_2021,
address = {Virtual Conference, online},
series = {{CEUR} {Workshop} {Proceedings}},
title = {Quality {Assessment} of {Knowledge} {Graph} {Hierarchies} using {KG}-{BERT}},
volume = {3034},
url = {http://ceur-ws.org/Vol-3034/#paper1},
language = {en},
urldate = {2022-02-05},
booktitle = {Proceedings of the {Workshop} on {Deep} {Learning} for {Knowledge} {Graphs} ({DL4KG} 2021)},
publisher = {CEUR},
author = {Szarkowska, Kinga and Moore, Veronique and Vandenbussche, Pierre-Yves and Groth, Paul},
editor = {Alam, Mehwish and Buscaldi, Davide and Cochez, Michael and Osborne, Francesco and Recupero, Diego Reforgiato and Sack, Harald},
month = oct,
year = {2021},
}
@inproceedings{boef_graphpope:_2021,
address = {Virtual Conference, online},
series = {{CEUR} {Workshop} {Proceedings}},
title = {{GraphPOPE}: {Retaining} {Structural} {Graph} {Information} {Using} {Position}-aware {Node}
{Embeddings}},
volume = {3034},
shorttitle = {{GraphPOPE}},
url = {http://ceur-ws.org/Vol-3034/#paper3},
language = {en},
urldate = {2022-02-05},
booktitle = {Proceedings of the {Workshop} on {Deep} {Learning} for {Knowledge} {Graphs} ({DL4KG} 2021)},
publisher = {CEUR},
author = {Boef, Jeroen Den and Cornelisse, Joran and Groth, Paul},
editor = {Alam, Mehwish and Buscaldi, Davide and Cochez, Michael and Osborne, Francesco and Recupero, Diego Reforgiato and Sack, Harald},
month = oct,
year = {2021},
}
@inproceedings{10.1145/3460210.3493573,
author = {Li, Xue and Magliacane, Sara and Groth, Paul},
title = {The Challenges of Cross-Document Coreference Resolution for Email},
year = {2021},
isbn = {9781450384575},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3460210.3493573},
doi = {10.1145/3460210.3493573},
abstract = {Long-form conversations such as email are an important source of information for knowledge capture. For tasks such as knowledge graph construction, conversational search, and entity linking, being able to resolve entities from across documents is important. Building on recent work on within document coreference resolution for email, we study for the first time a cross-document formulation of the problem. Our results show that the current state-of-the-art deep learning models for general cross-document coreference resolution are insufficient for email conversations. Our experiments show that the general task is challenging and, importantly for knowledge intensive tasks, coreference resolution models that only treat entity mentions perform worse. Based on these results, we outline the work needed to address this challenging task.},
booktitle = {Proceedings of the 11th on Knowledge Capture Conference},
pages = {273–276},
numpages = {4},
keywords = {cross-document coreference resolution, challenges, email conversations, entity resolution, conversational data},
location = {Virtual Event, USA},
series = {K-CAP '21}
}
@inproceedings{DBLP:conf/esws/ShroffVMG21,
author = {Natasha Shroff and
Pierre{-}Yves Vandenbussche and
V{\'{e}}ronique Moore and
Paul Groth},
editor = {Sarra Ben Abb{\`{e}}s and
Rim Hantach and
Philippe Calvez and
Davide Buscaldi and
Danilo Dess{\`{\i}} and
Mauro Dragoni and
Diego Reforgiato Recupero and
Harald Sack},
title = {Supporting Ontology Maintenance with Contextual Word Embeddings and
Maximum Mean Discrepancy},
booktitle = {Joint Proceedings of the 2nd International Workshop on Deep Learning
meets Ontologies and Natural Language Processing (DeepOntoNLP 2021)
{\&} 6th International Workshop on Explainable Sentiment Mining
and Emotion Detection {(X-SENTIMENT} 2021) co-located with co-located
with 18th Extended Semantic Web Conference 2021, Hersonissos, Greece,
June 6th - 7th, 2021 (moved online)},
series = {{CEUR} Workshop Proceedings},
volume = {2918},
pages = {11--19},
publisher = {CEUR-WS.org},
year = {2021},
url = {http://ceur-ws.org/Vol-2918/paper2.pdf},
timestamp = {Tue, 10 Aug 2021 16:26:49 +0200},
biburl = {https://dblp.org/rec/conf/esws/ShroffVMG21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/pkdd/2021mlsmkg,
author = {Mehwish Alam and
Mehdi Ali and
Paul Groth and
Pascal Hitzler and
Jens Lehmann and
Heiko Paulheim and
Achim Rettinger and
Harald Sack and
Afshin Sadeghi and
Volker Tresp},
title = {Proceedings of Machine Learning with Symbolic Methods and Knowledge Graphs co-located
with European Conference on Machine Learning and Principles and Practice
of Knowledge Discovery in Databases {(ECML} {PKDD} 2021), Virtual,
September 17, 2021},
series = {{CEUR} Workshop Proceedings},
volume = {2997},
publisher = {CEUR-WS.org},
year = {2021},
url = {http://ceur-ws.org/Vol-2997},
urn = {urn:nbn:de:0074-2997-0},
timestamp = {Mon, 15 Nov 2021 14:58:09 +0100},
biburl = {https://dblp.org/rec/conf/pkdd/2021mlsmkg.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{harper-etal-2021-semeval,
title = "{S}em{E}val-2021 Task 8: {M}eas{E}val {--} Extracting Counts and Measurements and their Related Contexts",
author = "Harper, Corey and
Cox, Jessica and
Kohler, Curt and
Scerri, Antony and
Daniel Jr., Ron and
Groth, Paul",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.semeval-1.38",
doi = "10.18653/v1/2021.semeval-1.38",
pages = "306--316",
award = {{SemEval 2021 Best Task Paper}},
awardurl = {https://semeval.github.io/SemEval2021/awards},
abstract = "We describe MeasEval, a SemEval task of extracting counts, measurements, and related context from scientific documents, which is of significant importance to the creation of Knowledge Graphs that distill information from the scientific literature. This is a new task in 2021, for which over 75 submissions from 25 participants were received. We expect the data developed for this task and the findings reported to be valuable to the scientific knowledge extraction, metrology, and automated knowledge base construction communities.",
}
@inproceedings{10.1145/3447928.3456653,
author = {Hunt, Nathan and Fulton, Nathan and Magliacane, Sara and Hoang, Trong Nghia and Das, Subhro and Solar-Lezama, Armando},
title = {Verifiably Safe Exploration for End-to-End Reinforcement Learning},
year = {2021},
isbn = {9781450383394},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3447928.3456653},
doi = {10.1145/3447928.3456653},
abstract = {Deploying deep reinforcement learning in safety-critical settings requires developing algorithms that obey hard constraints during exploration. This paper contributes a first approach toward enforcing formal safety constraints on end-to-end policies with visual inputs. Our approach draws on recent advances in object detection and automated reasoning for hybrid dynamical systems. The approach is evaluated on a novel benchmark that emphasizes the challenge of safely exploring in the presence of hard constraints. Our benchmark draws from several proposed problem sets for safe learning and includes problems that emphasize challenges such as reward signals that are not aligned with safety constraints. On each of these benchmark problems, our algorithm completely avoids unsafe behavior while remaining competitive at optimizing for as much reward as is safe. We characterize safety constraints in terms of a refinement relation on Markov decision processes - rather than directly constraining the reinforcement learning algorithm so that it only takes safe actions, we instead refine the environment so that only safe actions are defined in the environment's transition structure. This has pragmatic system design benefits and, more importantly, provides a clean conceptual setting in which we are able to prove important safety and efficiency properties. These allow us to transform the constrained optimization problem of acting safely in the original environment into an unconstrained optimization in a refined environment.},
booktitle = {Proceedings of the 24th International Conference on Hybrid Systems: Computation and Control},
articleno = {14},
numpages = {11},
keywords = {safe artificial intelligence, reinforcement learning, neural networks, differential dynamic logic, hybrid systems, formal verification},
location = {Nashville, Tennessee},
series = {HSCC '21},
award = {{Best Paper Award ACM HSCC 2021}},
awardurl = {https://hscc.acm.org/2021/awards/}
}
@inproceedings{10.1145/3442442.3453701,
author = {West, Robert and Bhagat, Smriti and Groth, Paul and Zitnik, Marinka and Couto, Francisco M. and Lisena, Pasquale and Mero\~{n}o-Pe\~{n}uela, Albert and Zhao, Xiangyu and Fan, Wenqi and Yin, Dawei and Tang, Jiliang and Shou, Linjun and Gong, Ming and Pei, Jian and Geng, Xiubo and Zhou, Xingjie and Jiang, Daxin and Ricaud, Benjamin and Aspert, Nicolas and Miz, Volodymyr and Dy, Jennifer and Ioannidis, Stratis and Y\i{}ld\i{}z, undefinedlkay and Rezapour, Rezvaneh and Aref, Samin and Dinh, Ly and Diesner, Jana and Drutsa, Alexey and Ustalov, Dmitry and Popov, Nikita and Baidakova, Daria and Mishra, Shubhanshu and Gopalan, Arjun and Juan, Da-Cheng and Ilharco Magalhaes, Cesar and Ferng, Chun-Sung and Heydon, Allan and Lu, Chun-Ta and Pham, Philip and Yu, George and Fan, Yicheng and Wang, Yueqi and Laurent, Florian and Schraner, Yanick and Scheller, Christian and Mohanty, Sharada and Chen, Jiawei and Wang, Xiang and Feng, Fuli and He, Xiangnan and Teinemaa, Irene and Albert, Javier and Goldenberg, Dmitri and Vasile, Flavian and Rohde, David and Jeunen, Olivier and Benhalloum, Amine and Sakhi, Otmane and Rong, Yu and Huang, Wenbing and Xu, Tingyang and Bian, Yatao and Cheng, Hong and Sun, Fuchun and Huang, Junzhou and Fakhraei, Shobeir and Faloutsos, Christos and \c{C}elebi, Onur and M\"{u}ller, Martin and Schneider, Manuel and Altunina, Olesia and Wingerath, Wolfram and Wollmer, Benjamin and Gessert, Felix and Succo, Stephan and Ritter, Norbert and Courdier, Evann and Avram, Tudor Mihai and Cvetinovic, Dragan and Tsinadze, Levan and Jose, Johny and Howell, Rose and Koenig, Mario and Defferrard, Micha\"{e}l and Kenthapadi, Krishnaram and Packer, Ben and Sameki, Mehrnoosh and Sephus, Nashlie},
title = {Summary of Tutorials at The Web Conference 2021},
year = {2021},
isbn = {9781450383134},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3442442.3453701},
doi = {10.1145/3442442.3453701},
abstract = {This report summarizes the 23 tutorials hosted at The Web Conference 2021: nine lecture-style tutorials and 14 hands-on tutorials.},
booktitle = {Companion Proceedings of the Web Conference 2021},
pages = {727–733},
numpages = {7},
location = {Ljubljana, Slovenia},
series = {WWW '21}
}
@article{Lamprecht2021,
doi = {10.12688/f1000research.54159.1},
url = {https://doi.org/10.12688/f1000research.54159.1},
year = {2021},
month = sep,
publisher = {F1000 Research Ltd},
volume = {10},
pages = {897},
author = {Anna-Lena Lamprecht and Magnus Palmblad and Jon Ison and Veit Schw\"{a}mmle and Mohammad Sadnan Al Manir and Ilkay Altintas and Christopher J. O. Baker and Ammar Ben Hadj Amor and Salvador Capella-Gutierrez and Paulos Charonyktakis and Michael R. Crusoe and Yolanda Gil and Carole Goble and Timothy J. Griffin and Paul Groth and Hans Ienasescu and Pratik Jagtap and Mat{\'{u}}{\v{s}} Kala{\v{s}} and Vedran Kasalica and Alireza Khanteymoori and Tobias Kuhn and Hailiang Mei and Herv{\'{e}} M{\'{e}}nager and Steffen M\"{o}ller and Robin A. Richardson and Vincent Robert and Stian Soiland-Reyes and Robert Stevens and Szoke Szaniszlo and Suzan Verberne and Aswin Verhoeven and Katherine Wolstencroft},
title = {Perspectives on automated composition of workflows in the life sciences},
journal = {F1000Research}
}
@inproceedings{10.1145/3448016.3457239,
author = {Schelter, Sebastian and Grafberger, Stefan and Dunning, Ted},
title = {HedgeCut: Maintaining Randomised Trees for Low-Latency Machine Unlearning},
year = {2021},
isbn = {9781450383431},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3448016.3457239},
doi = {10.1145/3448016.3457239},
abstract = {Software systems that learn from user data with machine learning (ML) have become ubiquitous over the last years. Recent law such as the "General Data Protection Regulation" (GDPR) requires organisations that process personal data to delete user data upon request (enacting the "right to be forgotten"). However, this regulation does not only require the deletion of user data from databases, but also applies to ML models that have been learned from the stored data. We therefore argue that ML applications should offer users to unlearn their data from trained models in a timely manner. We explore how fast this unlearning can be done under the constraints imposed by real world deployments, and introduce the problem of low-latency machine unlearning: maintaining a deployed ML model in-place under the removal of a small fraction of training samples without retraining.We propose HedgeCut, a classification model based on an ensemble of randomised decision trees, which is designed to answer unlearning requests with low latency. We detail how to efficiently implement HedgeCut with vectorised operators for decision tree learning. We conduct an experimental evaluation on five privacy-sensitive datasets, where we find that HedgeCut can unlearn training samples with a latency of around 100 microseconds and answers up to 36,000 prediction requests per second, while providing a training time and predictive accuracy similar to widely used implementations of tree-based ML models such as Random Forests.},
booktitle = {Proceedings of the 2021 International Conference on Management of Data},
pages = {1545–1557},
numpages = {13},
keywords = {machine unlearning, decision trees, serving systems},
location = {Virtual Event, China},
series = {SIGMOD/PODS '21}
}
@inproceedings{10.1145/3448016.3452759,
author = {Grafberger, Stefan and Guha, Shubha and Stoyanovich, Julia and Schelter, Sebastian},
title = {MLINSPECT: A Data Distribution Debugger for Machine Learning Pipelines},
year = {2021},
isbn = {9781450383431},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3448016.3452759},
doi = {10.1145/3448016.3452759},
abstract = {Machine Learning (ML) is increasingly used to automate impactful decisions, and the risks arising from this wide-spread use are garnering attention from policymakers, scientists, and the media. ML applications are often very brittle with respect to their input data, which leads to concerns about their reliability, accountability, and fairness. While bias detection cannot be fully automated, computational tools can help pinpoint particular types of data issues.We recently proposed mlinspect, a library that enables lightweight lineage-based inspection of ML preprocessing pipelines. In this demonstration, we show how mlinspect can be used to detect data distribution bugs in a representative pipeline. In contrast to existing work, mlinspect operates on declarative abstractions of popular data science libraries like estimator/transformer pipelines, can handle both relational and matrix data, and does not require manual code instrumentation. The library is publicly available at https://github.com/stefan-grafberger/mlinspect.},
booktitle = {Proceedings of the 2021 International Conference on Management of Data},
pages = {2736–2739},
numpages = {4},
keywords = {technical bias, machine learning pipelines, responsible data science, data distribution debugging},
location = {Virtual Event, China},
series = {SIGMOD/PODS '21}
}
@article{schelterspecialissue,
title={Letter from the Special Issue Editor},
author={Sebastian Schelter},
year={2021},
volume={44},
issue={1},
month={March},
journal={IEEE Data Engineering Bulletin (Special issue on Data validation for machine learning models and applications)},
url={http://sites.computer.org/debull/A21mar/p2.pdf}
}
@inproceedings{daza2021inductive,
title = {Inductive Entity Representations from Text via Link Prediction},
author = {Daniel Daza and Michael Cochez and Paul Groth},
booktitle = {Proceedings of The Web Conference},
year = {2021},
doi = {10.1145/3442381.3450141},
arxiv = {2010.03496},
month={April},
code = {https://github.com/dfdazac/blp}
}
@INPROCEEDINGS {Kersbergen2021,
author = {B. Kersbergen and S. Schelter},
booktitle = {2021 IEEE 37th International Conference on Data Engineering (ICDE)},
title = {Learnings from a Retail Recommendation System on Billions of Interactions at bol.com},
year = {2021},
volume = {},
issn = {},
pages = {2447-2452},
keywords = {measurement;scalability;conferences;europe;prediction algorithms;data engineering;recommender systems},
doi = {10.1109/ICDE51399.2021.00277},
url={https://ssc.io/pdf/bol-reco.pdf},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
month = {apr}
}
@inproceedings{DBLP:conf/edbt/RedyukKMS21,
author = {Sergey Redyuk and
Zoi Kaoudi and
Volker Markl and
Sebastian Schelter},
editor = {Yannis Velegrakis and
Demetris Zeinalipour{-}Yazti and
Panos K. Chrysanthis and
Francesco Guerra},
title = {Automating Data Quality Validation for Dynamic Data Ingestion},
booktitle = {Proceedings of the 24th International Conference on Extending Database
Technology, {EDBT} 2021, Nicosia, Cyprus, March 23 - 26, 2021},
pages = {61--72},
publisher = {OpenProceedings.org},
year = {2021},
url = {https://doi.org/10.5441/002/edbt.2021.07},
doi = {10.5441/002/edbt.2021.07},
timestamp = {Sat, 20 Mar 2021 10:29:38 +0100},
biburl = {https://dblp.org/rec/conf/edbt/RedyukKMS21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{
arakelyan2021complex,
title={Complex Query Answering with Neural Link Predictors},
author={Erik Arakelyan and Daniel Daza and Pasquale Minervini and Michael Cochez},
booktitle={International Conference on Learning Representations (ICLR)},
year={2021},
month={January},
url={https://openreview.net/forum?id=Mos9F9kDwkz},
arxiv={2011.03459},
award={{Outstanding Paper Award ICLR 2021}},
awardurl={https://iclr-conf.medium.com/announcing-iclr-2021-outstanding-paper-awards-9ae0514734ab}
}
@inproceedings{DBLP:conf/edbt/SchelterRB21,
author = {Sebastian Schelter and
Tammo Rukat and
Felix Biessmann},
editor = {Yannis Velegrakis and
Demetris Zeinalipour{-}Yazti and
Panos K. Chrysanthis and
Francesco Guerra},
title = {{JENGA} - {A} Framework to Study the Impact of Data Errors on the
Predictions of Machine Learning Models},
booktitle = {Proceedings of the 24th International Conference on Extending Database
Technology, {EDBT} 2021, Nicosia, Cyprus, March 23 - 26, 2021},
pages = {529--534},
publisher = {OpenProceedings.org},
year = {2021},
url = {https://doi.org/10.5441/002/edbt.2021.63},
doi = {10.5441/002/edbt.2021.63},
timestamp = {Sat, 20 Mar 2021 10:29:38 +0100},
biburl = {https://dblp.org/rec/conf/edbt/SchelterRB21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cidr/GrafbergerSS21,
author = {Stefan Grafberger and
Julia Stoyanovich and
Sebastian Schelter},
title = {Lightweight Inspection of Data Preprocessing in Native Machine Learning
Pipelines},
booktitle = {11th Conference on Innovative Data Systems Research, {CIDR} 2021,
Virtual Event, January 11-15, 2021, Online Proceedings},
publisher = {www.cidrdb.org},
year = {2021},
url = {http://cidrdb.org/cidr2021/papers/cidr2021\_paper27.pdf},
timestamp = {Tue, 23 Mar 2021 15:35:48 +0100},
biburl = {https://dblp.org/rec/conf/cidr/GrafbergerSS21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{scheltertaming,
title={Taming Technical Bias in Machine Learning Pipelines},
author={Schelter, Sebastian and Stoyanovich, Julia},
year={2021},
journal={IEEE Data Engineering Bulletin (Special Issue on Interdisciplinary Perspectives on Fairness and Artificial Intelligence Systems)},
url={http://sites.computer.org/debull/A20dec/p39.pdf}
}
% 2020
@proceedings{DBLP:conf/i-semantics/2020,
author = {Eva Blomqvist and
Paul Groth and
Victor de Boer and
Tassilo Pellegrini and
Mehwish Alam and
Tobias K{\"{a}}fer and
Peter Kieseberg and
Sabrina Kirrane and
Albert Mero{\~{n}}o{-}Pe{\~{n}}uela and
Harshvardhan J. Pandit},
title = {Semantic Systems. In the Era of Knowledge Graphs - 16th International
Conference on Semantic Systems, SEMANTiCS 2020, Amsterdam, The Netherlands,
September 7-10, 2020, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {12378},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-59833-4},
doi = {10.1007/978-3-030-59833-4},
isbn = {978-3-030-59832-7},
timestamp = {Tue, 27 Oct 2020 16:52:33 +0100},
biburl = {https://dblp.org/rec/conf/i-semantics/2020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{10.1145/3446428,
author = {Zeng, Weixin and Zhao, Xiang and Tang, Jiuyang and Lin, Xuemin and Groth, Paul},
title = {Reinforcement Learning–Based Collective Entity Alignment with Adaptive Features},
year = {2021},
issue_date = {July 2021},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {39},
number = {3},
issn = {1046-8188},
url = {https://doi.org/10.1145/3446428},
doi = {10.1145/3446428},
abstract = {Entity alignment (EA) is the task of identifying the entities that refer to the same real-world object but are located in different knowledge graphs (KGs). For entities to be aligned, existing EA solutions treat them separately and generate alignment results as ranked lists of entities on the other side. Nevertheless, this decision-making paradigm fails to take into account the interdependence among entities. Although some recent efforts mitigate this issue by imposing the 1-to-1 constraint on the alignment process, they still cannot adequately model the underlying interdependence and the results tend to be sub-optimal.To fill in this gap, in this work, we delve into the dynamics of the decision-making process, and offer a reinforcement learning (RL)–based model to align entities collectively. Under the RL framework, we devise the coherence and exclusiveness constraints to characterize the interdependence and restrict collective alignment. Additionally, to generate more precise inputs to the RL framework, we employ representative features to capture different aspects of the similarity between entities in heterogeneous KGs, which are integrated by an adaptive feature fusion strategy. Our proposal is evaluated on both cross-lingual and mono-lingual EA benchmarks and compared against state-of-the-art solutions. The empirical results verify its effectiveness and superiority.},
journal = {ACM Trans. Inf. Syst.},
month = {may},
articleno = {26},
numpages = {31},
keywords = {adaptive feature fusion, Entity alignment, reinforcement learning}
}
@inproceedings{brate-etal-2020-towards,
title = "Towards Olfactory Information Extraction from Text: A Case Study on Detecting Smell Experiences in Novels",
author = "Brate, Ryan and
Groth, Paul and
van Erp, Marieke",
booktitle = "Proceedings of the The 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = dec,
year = "2020",
address = "Online",
publisher = "International Committee on Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.latechclfl-1.18",
pages = "147--155",
abstract = "Environmental factors determine the smells we perceive, but societal factors factors shape the importance, sentiment and biases we give to them. Descriptions of smells in text, or as we call them {`}smell experiences{'}, offer a window into these factors, but they must first be identified. To the best of our knowledge, no tool exists to extract references to smell experiences from text. In this paper, we present two variations on a semi-supervised approach to identify smell experiences in English literature. The combined set of patterns from both implementations offer significantly better performance than a keyword-based baseline.",
}
@article{Koesten2020,
doi = {10.1016/j.patter.2020.100136},
url = {https://doi.org/10.1016/j.patter.2020.100136},
year = {2020},
month = nov,
publisher = {Cell Press},
pages = {100136},
author = {Laura Koesten and Pavlos Vougiouklis and Elena Simperl and Paul Groth},
title = {Dataset Reuse: Toward Translating Principles to Practice},
journal = {Patterns}
}
@misc{https://doi.org/10.6084/m9.figshare.13010000.v2,
doi = {10.6084/M9.FIGSHARE.13010000.V2},
url = {https://altmetric.figshare.com/articles/online_resource/The_State_of_Altmetrics_A_tenth_anniversary_celebration/13010000/2},
author = {{Altmetric Engineering} and Konkiel, Stacy and Priem, Jason and Adie, Euan and Derrick, Gemma and Didegah, Fereshteh and Groth, Paul and Neylon, Cameron and {Shenmeng Xu} and Zahedi, Zohreh and Bowman, Timothy and {Vanash M Patel} and Haunschild, Robin and Bornmann, Lutz and Taylor, Mike and Ross, Liesa and Theng, Yin-Leng and Hassan, Saeed-Ul and Aljohani, Naif R.},
keywords = {Applied Computer Science},
title = {The state of altmetrics: a tenth anniversary celebration},
publisher = {Altmetric},
year = {2020},
copyright = {Creative Commons Attribution 4.0 International}
}
@inproceedings{berger-etal-2020-effective,
title = "Effective distributed representations for academic expert search",
author = "Berger, Mark and
Zavrel, Jakub and
Groth, Paul",
booktitle = "Proceedings of the First Workshop on Scholarly Document Processing at EMNLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.sdp-1.7",
pages = "56--71",
abstract = "Expert search aims to find and rank experts based on a user{'}s query. In academia, retrieving experts is an efficient way to navigate through a large amount of academic knowledge. Here, we study how different distributed representations of academic papers (i.e. embeddings) impact academic expert retrieval. We use the Microsoft Academic Graph dataset and experiment with different configurations of a document-centric voting model for retrieval. In particular, we explore the impact of the use of contextualized embeddings on search performance. We also present results for paper embeddings that incorporate citation information through retrofitting. Additionally, experiments are conducted using different techniques for assigning author weights based on author order. We observe that using contextual embeddings produced by a transformer model trained for sentence similarity tasks produces the most effective paper representations for document-centric expert retrieval. However, retrofitting the paper embeddings and using elaborate author contribution weighting strategies did not improve retrieval performance.",
}
@article{KOESTEN2021102562,
title = "Talking datasets – Understanding data sensemaking behaviours",
journal = "International Journal of Human-Computer Studies",
volume = "146",
pages = "102562",
year = "2021",
issn = "1071-5819",
arxiv = "1911.09041",
doi = "10.1016/j.ijhcs.2020.102562",
url = "http://www.sciencedirect.com/science/article/pii/S1071581920301646",
author = "Laura Koesten and Kathleen Gregory and Paul Groth and Elena Simperl",
keywords = "Sensemaking, Human computer interaction, Human data interaction, Data reuse, Data sharing",
abstract = "The sharing and reuse of data are seen as critical to solving the most complex problems of today. Despite this potential, relatively little attention has been paid to a key step in data reuse: the behaviours involved in data-centric sensemaking. We aim to address this gap by presenting a mixed-methods study combining in-depth interviews, a think-aloud task and a screen recording analysis with 31 researchers from different disciplines as they summarised and interacted with both familiar and unfamiliar data. We use our findings to identify and detail common patterns of data-centric sensemaking across three clusters of activities that we present as a framework: inspecting data, engaging with content, and placing data within broader contexts. Additionally, we propose design recommendations for tools and documentation practices, which can be used to facilitate sensemaking and subsequent data reuse."
}
@inproceedings{10.1145/3340531.3414072, author = {Alam, Mehwish and Groth, Paul and Hitzler, Pascal and Paulheim, Heiko and Sack, Harald and Tresp, Volker}, title = {CSSA'20: Workshop on Combining Symbolic and Sub-Symbolic Methods and Their Applications}, year = {2020}, isbn = {9781450368599}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3340531.3414072}, doi = {10.1145/3340531.3414072}, abstract = {There has been a rapid growth in the use of symbolic representations along with their applications in many important tasks. Symbolic representations, in the form of Knowledge Graphs (KGs), constitute large networks of real-world entities and their relationships. On the other hand, sub-symbolic artificial intelligence has also become a mainstream area of research. This workshop brought together researchers to discuss and foster collaborations on the intersection of these two areas.}, booktitle = {Proceedings of the 29th ACM International Conference on Information & Knowledge Management}, pages = {3523–3524}, numpages = {2}, keywords = {sub-symbolic artificial intelligence, symbolic artificial intelligence, knowledge graphs, neural networks}, location = {Virtual Event, Ireland}, series = {CIKM '20} }
@InProceedings{10.1007/978-3-030-62516-0_1,
author="Bernstein, Mark
and Palosaari Eladhari, Mirjam
and Koenitz, Hartmut
and Louchart, Sandy
and Nack, Frank
and Martens, Chris
and Rossi, Giulia Carla
and Bosser, Anne-Gwenn
and Millard, David E.",
editor="Bosser, Anne-Gwenn
and Millard, David E.
and Hargood, Charlie",
title="ICIDS2020 Panel: Building the Discipline of Interactive Digital Narratives",
booktitle="Interactive Storytelling",
year="2020",
publisher="Springer International Publishing",
address="Cham",
pages="3--11",
doi="10.1007/978-3-030-62516-0_1",
abstract="Building our discipline has been an ongoing discussion since the early days of ICIDS. From earlier international joint efforts to integrate research from multiple fields of study to today's endeavours by researchers to provide scholarly works of reference, the discussion on how to continue building Interactive Digital Narratives as a discipline with its own vocabulary, scope, evaluation and methods is far from over. This year, we have chosen to continue this discussion through a panel in order to explore what are the epistemological implications of the multiple disciplinary roots of our field, and what are the next steps we should take as a community.",
isbn="978-3-030-62516-0"
}
@article{schelterSigmodRecord2020,
author = {Sebastian Schelter},
title = {Technical Perspective: Query Optimization for Faster Deep CNN Explanations},
journal = {ACM SIGMOD Record},
year = {2020},
volume = {49},
number = {1},
url = {https://sigmodrecord.org/?smd_process_download=1&download_id=2686}
}
@article{JMLR:v21:18-800,
author = {Robin Anil and Gokhan Capan and Isabel Drost-Fromm and Ted Dunning and Ellen Friedman and Trevor Grant and Shannon Quinn and Paritosh Ranjan and Sebastian Schelter and Özgür Yılmazel},
title = {Apache Mahout: Machine Learning on Distributed Dataflow Systems},
journal = {Journal of Machine Learning Research},
year = {2020},
volume = {21},
number = {127},
pages = {1-6},
url = {http://jmlr.org/papers/v21/18-800.html}
}
@inproceedings{daza2020message,
title={Message Passing Query Embedding},
url = {https://arxiv.org/abs/2002.02406},
booktitle = {{ICML Workshop - Graph Representation Learning and Beyond}},
author={Daza, Daniel and Cochez, Michael},
year = {2020},
arxiv = {2002.02406}
}
@article{GrothDumontier2020,
title = {Introduction – FAIR data, systems and analysis},
volume = {3},
ISSN = {2451-8492},
url = {http://doi.org/10.3233/DS-200029},
DOI = {10.3233/DS-200029},
number = {1},
journal = {Data Science},
publisher = {IOS Press},
author = {Groth, Paul and Dumontier, Michel},
year = {2020},
month = Jun,
pages = {1–2}
}
@article{selten2020,
author = {Selten, Friso and Neylon, Cameron and Huang, Chun-Kai and Groth, Paul},
title = {A longitudinal analysis of university rankings},
journal = {Quantitative Science Studies},
volume = {1},
number = {3},
pages = {1109-1135},
year = {2020},
doi = {10.1162/qss\_a\_00052},
URL = {
https://doi.org/10.1162/qss_a_00052
},
eprint = {
https://doi.org/10.1162/qss_a_00052
}
}
@inproceedings{yang_2020,
title = {Fairness-Aware Instrumentation of Preprocessing Pipelines for Machine
Learning},
url = {https://ssc.io/pdf/hilda-fairdags.pdf},
booktitle = {{Workshop on Human-In-the-Loop Data Analytics (HILDA’20)}},
author = {Yang, Ke and Huang, Biao and Stoyanovich, Julia and Schelter, Sebastian},
month = june,
year = {2020},
doi = {10.1145/3398730.3399194},
}
@InProceedings{vanerp-groth:2020:LREC,
author = {van Erp, Marieke and Groth, Paul},
title = {Towards Entity Spaces},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
month = {May},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {2129--2137},
abstract = {Entities are a central element of knowledge bases and are important input to many knowledge-centric tasks including text analysis. For example, they allow us to find documents relevant to a specific entity irrespective of the underlying syntactic expression within a document. However, the entities that are commonly represented in knowledge bases are often a simplification of what is truly being referred to in text. For example, in a knowledge base, we may have an entity for Germany as a country but not for the more fuzzy concept of Germany that covers notions of German Population, German Drivers, and the German Government. Inspired by recent advances in contextual word embeddings, we introduce the concept of entity spaces - specific representations of a set of associated entities with near-identity. Thus, these entity spaces provide a handle to an amorphous grouping of entities. We developed a proof-of-concept for English showing how, through the introduction of entity spaces in the form of disambiguation pages, the recall of entity linking can be improved.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.261}
}
@inproceedings{stamatogiannakis_pandacap_2020,
title = {{PANDAcap}: {A} {Framework} for {Streamlining} {Collection} of {Full}-{System} {Traces}},
url = {https://download.vusec.net/papers/pandacap-eurosec20.pdf},
booktitle = {{EuroSec}},
author = {Stamatogiannakis, Manolis and Bos, Herbert and Groth, Paul},
month = apr,
year = {2020},
doi = {10.1145/3380786.3391396},
code = {https://github.com/vusec/pandacap},
keywords = {PANDA, class\_provenance, dataset, docker, framework, honeypot, record and replay, type\_workshop}
}
@article{Gregory2020Lost,
journal = {Harvard Data Science Review},
doi = {10.1162/99608f92.e38165eb},
note = {https://hdsr.mitpress.mit.edu/pub/gw3r97ht},
title = {Lost or Found? Discovering Data Needed for Research },
url = {https://hdsr.mitpress.mit.edu/pub/gw3r97ht},
author = {Gregory, Kathleen and Groth, Paul and Scharnhorst, Andrea and Wyatt, Sally},
date = {2020-04-30},
year = {2020},
month = {4},
day = {30},
}
@article{Kastner2020,
doi = {10.1007/s11042-019-08571-4},
url = {https://doi.org/10.1007/s11042-019-08571-4},
year = {2020},
month = feb,
publisher = {Springer Science and Business Media {LLC}},
author = {Marc A. Kastner and Ichiro Ide and Frank Nack and Yasutomo Kawanishi and Takatsugu Hirayama and Daisuke Deguchi and Hiroshi Murase},
title = {Estimating the imageability of words by mining visual characteristics from crawled image data},
journal = {Multimedia Tools and Applications}
}
@InProceedings{10.1007/978-3-030-33894-7_9,
author="Kolhoff, Lobke
and Nack, Frank",
editor="Cardona-Rivera, Rogelio E.
and Sullivan, Anne
and Young, R. Michael",
title="How Relevant Is Your Choice? ",
booktitle=" ICIDS 2019. Lecture Notes in Computer Science, vol 11869",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="73--85",
abstract="With the release of the film Black Mirror: Bandersnatch Netflix entered the area of interactive streamed narratives. We performed a qualitative analysis with 169 Netflix subscribers that had watched the episode. The key findings show (1) participants are initially engaged because of curiosity and the novelty value, and desire to explore the narrative regardless of satisfaction, (2) perceived agency is limited due to arbitrary choices and the lack of meaningful consequences, (3) the overall experience is satisfactory but adaptions are desirable in future design to make full use of the potential of the format.",
isbn="978-3-030-33894-7"
}
@article{Groth2020,
doi = {10.1162/dint_a_00030},
url = {https://doi.org/10.1162/dint_a_00030},
year = {2020},
month = jan,
publisher = {{MIT} Press - Journals},
volume = {2},
number = {1-2},
pages = {78--86},
author = {Paul Groth and Helena Cousijn and Tim Clark and Carole Goble},
title = {{FAIR} Data Reuse {\textendash} the Path through Data Citation},
journal = {Data Intelligence}
}
@inproceedings{DBLP:conf/i-semantics/SymeonidouSG19,
author = {Anthi Symeonidou and
Viachaslau Sazonau and
Paul Groth},
title = {Transfer Learning for Biomedical Named Entity Recognition with BioBERT},
booktitle = {Proceedings of the Posters and Demo Track of the 15th International
Conference on Semantic Systems co-located with 15th International
Conference on Semantic Systems (SEMANTiCS 2019), Karlsruhe, Germany,
September 9th - to - 12th, 2019.},
year = {2019},
crossref = {DBLP:conf/i-semantics/2019pd},
url = {http://ceur-ws.org/Vol-2451/paper-26.pdf},
timestamp = {Wed, 25 Sep 2019 17:11:16 +0200},
biburl = {https://dblp.org/rec/bib/conf/i-semantics/SymeonidouSG19},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{doi:10.1177/0165551519837182,
author = {Kathleen M Gregory and Helena Cousijn and Paul Groth and Andrea Scharnhorst and Sally Wyatt},
title ={Understanding data search as a socio-technical practice},
journal = {Journal of Information Science},
volume = {0},
number = {0},
pages = {0165551519837182},
year = {2019},
doi = {10.1177/0165551519837182},
url = {https://doi.org/10.1177/0165551519837182},
eprint = {https://doi.org/10.1177/0165551519837182},
abstract = { Open research data are heralded as having the potential to increase effectiveness, productivity and reproducibility in science, but little is known about the actual practices involved in data search. The socio-technical problem of locating data for reuse is often reduced to the technological dimension of designing data search systems. We combine a bibliometric study of the current academic discourse around data search with interviews with data seekers. In this article, we explore how adopting a contextual, socio-technical perspective can help to understand user practices and behaviour and ultimately help to improve the design of data discovery systems. },
}
@article{doi:10.1002/asi.24165,
author = {Gregory, Kathleen and Groth, Paul and Cousijn, Helena and Scharnhorst, Andrea and Wyatt, Sally},
title = {Searching Data: A Review of Observational Data Retrieval Practices in Selected Disciplines},
journal = {Journal of the Association for Information Science and Technology},
volume = {0},
number = {0},
pages = {},
Year = {2019},
doi = {10.1002/asi.24165},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.24165},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.24165},
abstract = {A cross-disciplinary examination of the user behaviors involved in seeking and evaluating data is surprisingly absent from the research data discussion. This review explores the data retrieval literature to identify commonalities in how users search for and evaluate observational research data in selected disciplines. Two analytical frameworks, rooted in information retrieval and science and technology studies, are used to identify key similarities in practices as a first step toward developing a model describing data retrieval.},
}
@article{Chapman2019,
doi = {10.1007/s00778-019-00564-x},
url = {https://doi.org/10.1007/s00778-019-00564-x},
year = {2020},
month = {August},
publisher = {Springer Science and Business Media {LLC}},
volume = {29},
number = {1},
pages = {251--272},
author = {Adriane Chapman and Elena Simperl and Laura Koesten and George Konstantinidis and Luis-Daniel Ib{\'{a}}{\~{n}}ez and Emilia Kacprzak and Paul Groth},
title = {Dataset search: a survey},
journal = {The {VLDB} Journal},
arxiv = {1901.00735}
}
@inproceedings{DBLP:conf/esws/GrothSDA19,
author = {Paul T. Groth and
Antony Scerri and
Ron Daniel and
Bradley P. Allen},
title = {End-to-End Learning for Answering Structured Queries Directly over
Text},
booktitle = {Proceedings of the Workshop on Deep Learning for Knowledge Graphs
{(DL4KG2019)} Co-located with the 16th Extended Semantic Web Conference
2019 {(ESWC} 2019), Portoroz, Slovenia, June 2, 2019.},
pages = {57--70},
year = {2019},
crossref = {DBLP:conf/esws/2019dl4kg},
url = {http://ceur-ws.org/Vol-2377/paper\_7.pdf},
timestamp = {Wed, 12 Jun 2019 13:22:16 +0200},
biburl = {https://dblp.org/rec/bib/conf/esws/GrothSDA19},
bibsource = {dblp computer science bibliography, https://dblp.org},
Eprint = {arXiv:1811.06303},
arxiv = {1811.06303},
}
@inproceedings{DBLP:conf/coling/GrothLSD18,
author = {Paul T. Groth and
Michael Lauruhn and
Antony Scerri and
Ron Daniel},
title = {Open Information Extraction on Scientific Text: An Evaluation},
booktitle = {Proceedings of the 27th International Conference on Computational
Linguistics, {COLING} 2018, Santa Fe, New Mexico, USA, August 20-26,
2018},
pages = {3414--3423},
year = {2018},
crossref = {DBLP:conf/coling/2018},
url = {https://aclanthology.info/papers/C18-1289/c18-1289},
timestamp = {Wed, 15 Aug 2018 11:10:27 +0200},
biburl = {https://dblp.org/rec/bib/conf/coling/GrothLSD18},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/semweb/DeJongBDHMOSSST18,
author = {Alex DeJong and
Radmila Bord and
Will Dowling and
Rinke Hoekstra and
Ryan Moquin and
Charlie O and
Mevan Samarasinghe and
Paul Snyder and
Craig Stanley and
Anna Tordai and
Michael Trefry and
Paul T. Groth},
title = {Elsevier's Healthcare Knowledge Graph and the Case for Enterprise
Level Linked Data Standards},
booktitle = {Proceedings of the {ISWC} 2018 Posters {\&} Demonstrations, Industry
and Blue Sky Ideas Tracks co-located with 17th International Semantic
Web Conference {(ISWC} 2018), Monterey, USA, October 8th - to - 12th,
2018.},
year = {2018},
crossref = {DBLP:conf/semweb/2018p},
url = {http://ceur-ws.org/Vol-2180/paper-85.pdf},
timestamp = {Thu, 11 Oct 2018 12:45:11 +0200},
biburl = {https://dblp.org/rec/bib/conf/semweb/DeJongBDHMOSSST18},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/semweb/LauruhnGHD18,
author = {Michael Lauruhn and
Paul T. Groth and
Corey A. Harper and
Helena F. Deus},
title = {Use of Internal Testing Data to Help Determine Compensation for Crowdsourcing
Tasks},
booktitle = {Proceedings of the 2nd International Workshop on Augmenting Intelligence
with Humans{\-}-in-{\-}the-{\-}Loop co-located with 17th International
Semantic Web Conference {(ISWC} 2018), Monterey, California, October
9th, 2018.},
pages = {27--37},
year = {2018},
crossref = {DBLP:conf/semweb/2018huml},
url = {http://ceur-ws.org/Vol-2169/paper-04.pdf},
timestamp = {Mon, 29 Oct 2018 16:50:54 +0100},
biburl = {https://dblp.org/rec/bib/conf/semweb/LauruhnGHD18},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tkde/WylotCHG17,
author = {Marcin Wylot and
Philippe Cudr{\'{e}}{-}Mauroux and
Manfred Hauswirth and
Paul T. Groth},
title = {Storing, Tracking, and Querying Provenance in Linked Data},
journal = {{IEEE} Trans. Knowl. Data Eng.},
volume = {29},
number = {8},
pages = {1751--1764},
year = {2017},
url = {https://doi.org/10.1109/TKDE.2017.2690299},
doi = {10.1109/TKDE.2017.2690299},
timestamp = {Tue, 18 Jul 2017 10:49:06 +0200},
biburl = {https://dblp.org/rec/bib/journals/tkde/WylotCHG17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/toit/Stamatogiannakis17,
author = {Manolis Stamatogiannakis and
Elias Athanasopoulos and
Herbert Bos and
Paul T. Groth},
title = {{PROV2R: Practical Provenance Analysis of Unstructured
Processes}},
journal = {{ACM} Trans. Internet Techn.},
volume = {17},
number = {4},
pages = {37:1--37:24},
year = {2017},
url = {https://doi.org/10.1145/3062176},
doi = {10.1145/3062176},
timestamp = {Fri, 02 Nov 2018 09:33:33 +0100},
biburl = {https://dblp.org/rec/bib/journals/toit/Stamatogiannakis17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{10.7717/peerj.3997,
title = {Indicators for the use of robotic labs in basic biomedical research: a literature analysis},
author = {Groth, Paul and Cox, Jessica},
year = 2017,
month = nov,
keywords = {Robotic labs, Indicators, Text mining, Methods, Literature analysis},
abstract = {
Robotic labs, in which experiments are carried out entirely by robots, have the potential to provide a reproducible and transparent foundation for performing basic biomedical laboratory experiments. In this article, we investigate whether these labs could be applicable in current experimental practice. We do this by text mining 1,628 papers for occurrences of methods that are supported by commercial robotic labs. Using two different concept recognition tools, we find that 86%–89% of the papers have at least one of these methods. This and our other results provide indications that robotic labs can serve as the foundation for performing many lab-based experiments.
},
volume = 5,
pages = {e3997},
journal = {PeerJ},
issn = {2167-8359},
url = {https://doi.org/10.7717/peerj.3997},
doi = {10.7717/peerj.3997}
}
@incollection{DBLP:books/sp/17/HauswirthWGGC17,
author = {Manfred Hauswirth and
Marcin Wylot and
Martin Grund and
Paul T. Groth and
Philippe Cudr{\'{e}}{-}Mauroux},
title = {Linked Data Management},
booktitle = {Handbook of Big Data Technologies},
pages = {307--338},
year = {2017},
crossref = {DBLP:books/sp/ZS2017},
url = {https://doi.org/10.1007/978-3-319-49340-4\_9},
doi = {10.1007/978-3-319-49340-4\_9},
timestamp = {Fri, 02 Nov 2018 09:27:05 +0100},
biburl = {https://dblp.org/rec/bib/books/sp/17/HauswirthWGGC17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/akbc/GrothPMAD16,
author = {Paul T. Groth and
Sujit Pal and
Darin McBeath and
Brad Allen and
Ron Daniel},
title = {Applying Universal Schemas for Domain Specific Ontology Expansion},
booktitle = {Proceedings of the 5th Workshop on Automated Knowledge Base Construction,
AKBC@NAACL-HLT 2016, San Diego, CA, USA, June 17, 2016},
pages = {81--85},
year = {2016},
crossref = {DBLP:conf/akbc/2016},
url = {http://aclweb.org/anthology/W/W16/W16-1315.pdf},
timestamp = {Wed, 05 Oct 2016 13:56:02 +0200},
biburl = {https://dblp.org/rec/bib/conf/akbc/GrothPMAD16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{fairprinciples2016,
title = "The FAIR Guiding Principles for scientific data management and stewardship",
abstract = "There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders-representing academia, industry, funding agencies, and scholarly publishers-have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the FAIR Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the FAIR Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the FAIR Principles, and includes the rationale behind them, and some exemplar implementations in the community.",
author = "Wilkinson, {Mark D.} and Michel Dumontier and Aalbersberg, {IJsbrand Jan} and Gabrielle Appleton and Myles Axton and Arie Baak and Niklas Blomberg and Boiten, {Jan Willem} and {da Silva Santos}, {Luiz Bonino} and Bourne, {Philip E.} and Jildau Bouwman and Brookes, {Anthony J.} and Tim Clark and Merc{\`e} Crosas and Ingrid Dillo and Olivier Dumon and Scott Edmunds and Evelo, {Chris T.} and Richard Finkers and Alejandra Gonzalez-Beltran and Gray, {Alasdair J.G.} and Paul Groth and Carole Goble and Grethe, {Jeffrey S.} and Jaap Heringa and {'t Hoen}, {Peter A C} and Rob Hooft and Tobias Kuhn and Ruben Kok and Joost Kok and Lusher, {Scott J.} and Martone, {Maryann E} and Albert Mons and Packer, {Abel L.} and Bengt Persson and Philippe Rocca-Serra and Marco Roos and {van Schaik}, Rene and Sansone, {Susanna Assunta} and Erik Schultes and Thierry Sengstag and Ted Slater and George Strawn and Swertz, {Morris A.} and Mark Thompson and {Van Der Lei}, Johan and {Van Mulligen}, Erik and Jan Velterop and Andra Waagmeester and Peter Wittenburg and Katherine Wolstencroft and Jun Zhao and Barend Mons",
year = "2016",
doi = "10.1038/sdata.2016.18",
language = "English",
volume = "3",
journal = "Scientific Data",
issn = "2052-4463",
publisher = "Nature Publishing Group",
}
@article{article,
author = {Lauruhn, Michael and Groth, Paul},
year = {2016},
month = {11},
pages = {},
title = {Sources of Change for Modern Knowledge Organization Systems},
volume = {43},
journal = {KNOWLEDGE ORGANIZATION},
doi = {10.5771/0943-7444-2016-8-622},
arxiv = {1611.00217}
}