Skip to content

Commit

Permalink
feat(schema): make MetadataBlock data-binding possible IQSS#8085
Browse files Browse the repository at this point in the history
- Use the Univocity annotations on the model
- Add proper validation restrictions
- Make the column headers (or future mappings) part of the model
  by adding a proper enum, representing the (TSV column) order and
  the key values
  • Loading branch information
poikilotherm committed Sep 3, 2021
1 parent 24345c2 commit b516482
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 8 deletions.
95 changes: 87 additions & 8 deletions src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java
@@ -1,11 +1,16 @@
package edu.harvard.iq.dataverse;

import com.univocity.parsers.annotations.Parsed;
import com.univocity.parsers.annotations.Validate;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.metadata.Placeholder;

import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.MissingResourceException;
import java.util.Objects;
import java.util.stream.Collectors;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
Expand All @@ -26,14 +31,45 @@
*
* @author skraffmiller
*/
@Table(indexes = {@Index(columnList="name")
, @Index(columnList="owner_id")})
@Table(indexes = {@Index(columnList="name"),
@Index(columnList="owner_id")})
@NamedQueries({
@NamedQuery( name="MetadataBlock.listAll", query = "SELECT mdb FROM MetadataBlock mdb"),
@NamedQuery( name="MetadataBlock.findByName", query = "SELECT mdb FROM MetadataBlock mdb WHERE mdb.name=:name")
})
@Entity
public class MetadataBlock implements Serializable {

/**
* Reusable definition of headers used for parsing this model class from data (TSV, JSON, manual, ...)
* Using the Headers.Constants class to work around annotations not able to use enum values (a Java limitation).
*/
public enum Headers {
// Order matters: this must be the same order as we define rules for the TSV format!
NAME(Constants.NAME),
OWNER(Constants.OWNER),
DISPLAY_NAME(Constants.DISPLAY_NAME),
NAMESPACE_URI(Constants.NAMESPACE_URI);

public static final class Constants {
public final static String NAME = "name";
public final static String OWNER = "dataverseAlias";
public final static String DISPLAY_NAME = "displayName";
public final static String NAMESPACE_URI = "blockURI";
}

private final String key;
Headers(String key) {
this.key = key;
}
public String key() {
return this.key;
}

public static String[] keys() {
return Arrays.stream(values()).map(v -> v.key()).collect(Collectors.toUnmodifiableList()).toArray(new String[]{});
}
}

private static final long serialVersionUID = 1L;

Expand All @@ -59,13 +95,20 @@ public void setId(Long id) {
public String getName() {
return name;
}

@Parsed(field = Headers.Constants.NAME)
// Docs: No spaces or punctuation, except underscore. By convention, should start with a letter, and use lower camel case
@Validate(matches = "^[a-z][\\w]+$")
public void setName(String name) {
this.name = name;
}

public String getNamespaceUri() {
return namespaceUri;
}

@Parsed(field = Headers.Constants.NAMESPACE_URI)
@Validate(nullable = true, matches = "^https?://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]")
public void setNamespaceUri(String namespaceUri) {
this.namespaceUri = namespaceUri;
}
Expand Down Expand Up @@ -93,6 +136,9 @@ public boolean isDisplayOnCreate() {
public String getDisplayName() {
return displayName;
}

@Parsed(field = Headers.Constants.DISPLAY_NAME)
@Validate(matches = "^\\S.{0,255}$") // docs: match all but not blank strings, at least 1 character needed, not nullable, max 256 chars
public void setDisplayName(String displayName) {
this.displayName = displayName;
}
Expand All @@ -109,10 +155,33 @@ public boolean isRequired() {
public Dataverse getOwner() {
return owner;
}

public void setOwner(Dataverse owner) {
this.owner = owner;
}

/**
* Set the (optional) owning Dataverse collection of this metadata block. This and children of the collection
* will be able to use the metadata block.
*
* When this block is parsed by {@link edu.harvard.iq.dataverse.util.metadata.TsvMetadataBlockParser},
* the alias given in the TSV will be validated. For valid values see the docs
* ("Special characters (~,`, !, @, #, $, %, ^, &, and *) and spaces are not allowed")
* and {@link edu.harvard.iq.dataverse.Dataverse#alias} validation patterns.
* (The possessive matcher "+*" below achieves in 1 regex where the other validator needs 2)
*
* During parsing, a placeholder will be injected here, needing replacement and more validation.
*
* @param dataverseAlias The alias/identifier of the owning Dataverse collection
*/
@Parsed(field = Headers.Constants.OWNER)
@Validate(nullable = true, matches = "^[\\d]*+[\\w\\-]+$")
protected void setOwner(String dataverseAlias) {
if (dataverseAlias == null)
return;
this.owner = new Placeholder.Dataverse();
this.owner.setAlias(dataverseAlias);
}

@Transient
private boolean empty;
Expand Down Expand Up @@ -176,15 +245,25 @@ public boolean equals(Object object) {
}
MetadataBlock other = (MetadataBlock) object;
return !(!Objects.equals(this.id, other.id) && (this.id == null || !this.id.equals(other.id)));
}
}

@Override
public String toString() {
return "edu.harvard.iq.dataverse.MetadataBlock[ id=" + id + " ]";
return "MetadataBlock{" +
"id=" + id +
", name='" + name + '\'' +
", displayName='" + displayName + '\'' +
", namespaceUri='" + namespaceUri + '\'' +
", datasetFieldTypes=" + datasetFieldTypes +
", owner=" + owner +
", empty=" + empty +
", selected=" + selected +
", hasRequired=" + hasRequired +
", showDatasetFieldTypes=" + showDatasetFieldTypes +
'}';
}

public String getLocaleDisplayName()
{

public String getLocaleDisplayName() {
try {
return BundleUtil.getStringFromPropertyFile("metadatablock.displayName", getName());
} catch (MissingResourceException e) {
Expand Down
@@ -0,0 +1,182 @@
package edu.harvard.iq.dataverse.util.metadata;

import com.univocity.parsers.common.DataValidationException;
import com.univocity.parsers.common.processor.BeanListProcessor;
import com.univocity.parsers.tsv.TsvParser;
import com.univocity.parsers.tsv.TsvParserSettings;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.MetadataBlock;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EmptySource;
import org.junit.jupiter.params.provider.ValueSource;

import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.*;

class MetadataBlockParsingTest {

static BeanListProcessor<MetadataBlock> metadataBlockProcessor = new BeanListProcessor<>(MetadataBlock.class);
static TsvParser parser;
static TsvParserSettings settings = new TsvParserSettings();
static final String LONGER_THAN_256_CHARS = "Jx7Agh8hSs4EwkCHzxwXQHOVYiL0i79n4hxeP1PbVRgkmRyUqB9dlFSoFbqCmoZ0OUCPHLOz" +
"JMAZeTDxI3dj7QAQG6UuNBUaFDgyG40TRK6X3FiA0f8p4LZBHQC1HIbpIw7wiNmDoEfbrGHehAgbXWDDEXelGL4TXhSxHXIqfgNaLD9fNnk" +
"XXcqNsuWMvkDQNrKhUWFQQybhHWS8jh62AjRWEvqFXvqVAnrgZ8xFnRiSpDkubsGuZWZqRFVN6wSPd9sp0GrpEWa5eCv0oFtQLHx0";

@BeforeAll
static void setUp() {
settings.setProcessor(metadataBlockProcessor);
settings.setHeaders(MetadataBlock.Headers.keys());
parser = new TsvParser(settings);
}

@ParameterizedTest
@ValueSource(strings = {"hello", "helloMyName", "hello_my_name", "h1234"})
void setName_AllValid(String name) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.NAME, name,
MetadataBlock.Headers.DISPLAY_NAME, "display")));

// when
parser.parse(reader);
List<MetadataBlock> blocks = metadataBlockProcessor.getBeans();

// then
assertEquals(1, blocks.size());
assertEquals(name, blocks.get(0).getName());
}

@ParameterizedTest
@EmptySource
@ValueSource(strings = {"1234", "!", "hello+", "Hello", "hello-my_name_is", "what-s-up-5", "1234-foobar"})
void setName_AllInvalid(String name) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.NAME, name,
MetadataBlock.Headers.DISPLAY_NAME, "display")));

// when & then
assertThrows(DataValidationException.class, () -> { parser.parse(reader); });
}

@ParameterizedTest
@EmptySource
@ValueSource(strings = {"https://demo.dataverse.org/foobar", "http://demo.dataverse.org/foobar"})
void setNamespaceUri_Valid(String uri) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.NAME, "test",
MetadataBlock.Headers.DISPLAY_NAME, "display",
MetadataBlock.Headers.NAMESPACE_URI, uri)));

// when
parser.parse(reader);
List<MetadataBlock> blocks = metadataBlockProcessor.getBeans();

// then
assertEquals(1, blocks.size());
assertEquals(uri, Optional.ofNullable(blocks.get(0).getNamespaceUri()).orElse(""));
}

@ParameterizedTest
@ValueSource(strings = {"//demo.dataverse.org/foobar", "doi://demo.dataverse.org/foobar"})
void setNamespaceUri_Invalid(String uri) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.NAME, "test",
MetadataBlock.Headers.DISPLAY_NAME, "display",
MetadataBlock.Headers.NAMESPACE_URI, uri)));

// when & then
assertThrows(DataValidationException.class, () -> { parser.parse(reader); });
}

@ParameterizedTest
@ValueSource(strings = {"hello", "H 1234", "Hello this is my Name", "1234 Foo Bar Town", "DO NOT USE!!!"})
void setDisplayName_AllValid(String displayName) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.DISPLAY_NAME, displayName,
MetadataBlock.Headers.NAME, "test")));

// when
parser.parse(reader);
List<MetadataBlock> blocks = metadataBlockProcessor.getBeans();

// then
assertEquals(1, blocks.size());
assertEquals(displayName, blocks.get(0).getDisplayName());
}

@ParameterizedTest
@EmptySource
@ValueSource(strings = {" \t", "\t hello", " Hello Hello", LONGER_THAN_256_CHARS})
void setDisplayName_AllInvalid(String displayName) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.NAME, displayName,
MetadataBlock.Headers.DISPLAY_NAME, "display")));

// when & then
assertThrows(DataValidationException.class, () -> { parser.parse(reader); });
}

@ParameterizedTest
@EmptySource
@ValueSource(strings = {"hello", "HelloMyName", "hello_my_name_is", "hello-im-marc", "_foo-bar", "1234-test", "test-1234", "hello123"})
void setOwner_AllValid(String owner) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.DISPLAY_NAME, "test",
MetadataBlock.Headers.NAME, "test",
MetadataBlock.Headers.OWNER, owner)));

// when
parser.parse(reader);
List<MetadataBlock> blocks = metadataBlockProcessor.getBeans();

// then
assertEquals(1, blocks.size());
if (!owner.isEmpty()) {
assertNotNull(blocks.get(0).getOwner());
assertTrue(blocks.get(0).getOwner() instanceof Dataverse);
assertTrue(blocks.get(0).getOwner() instanceof Placeholder.Dataverse);
assertEquals(owner, blocks.get(0).getOwner().getAlias());
}
}

@ParameterizedTest
@ValueSource(strings = {"1234", "hello+", "Hello!"})
void setOwner_AllInvalid(String owner) {
// given
StringReader reader = new StringReader(generateMetadataBlockTSV(
Map.of(MetadataBlock.Headers.OWNER, owner,
MetadataBlock.Headers.DISPLAY_NAME, "display",
MetadataBlock.Headers.NAME, "test")));

// when & then
assertThrows(DataValidationException.class, () -> { parser.parse(reader); });
}

/**
* This method simply inserts all the values from the map into a line, combined by \t and adds a "header" line before it.
* It does this based on the {@link MetadataBlock.Headers} enum value order, which is the same as in the TSV definition.
* Nonpresent values will be inserted as blank strings.
*
* @param values
* @return
*/
public static String generateMetadataBlockTSV(Map<MetadataBlock.Headers, String> values) {
List<String> fieldValues = Arrays.stream(MetadataBlock.Headers.values())
.map(k -> values.getOrDefault(k, ""))
.collect(Collectors.toList());
return "unused header line" + settings.getFormat().getLineSeparatorString() + String.join("\t", fieldValues);
}
}

0 comments on commit b516482

Please sign in to comment.