Skip to content

Commit

Permalink
Rename normalisation form parameter in StreamUnicodeNormalizer
Browse files Browse the repository at this point in the history
Additionally, the documentation is improved.
  • Loading branch information
cboehme committed Jul 8, 2016
1 parent 314a641 commit 36cba90
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2015 Christoph Böhme
* Copyright 2015, 2016 Christoph Böhme
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
package org.culturegraph.mf.stream.pipe;

import java.text.Normalizer;
import java.text.Normalizer.Form;

import org.culturegraph.mf.framework.DefaultStreamPipe;
import org.culturegraph.mf.framework.StreamReceiver;
Expand All @@ -26,15 +27,15 @@
import org.culturegraph.mf.framework.annotations.Out;

/**
* Normalises Unicode characters in record identifiers, entity and literal names
* and literal values. Unicode normalisation converts between precomposed
* Normalises Unicode characters in record identifiers, entity and literal
* names and literal values. Unicode normalisation converts between precomposed
* characters and composed characters. There are four different forms of
* conversion which can be selected using {@code setNormalizationForm}. By
* default {@code StreamUnicodeNormalizer} converts from composed characters to
* precomposed characters using the {@link java.text.Normalizer.Form#NFC}
* conversion form. In the default configuration only literal values are
* converted. The various {@code setNormalize*} methods can be used to change
* this behaviour.
* conversion which can be selected using {@link #setNormalizationForm(Form)}.
<p>
* In the default configuration only literal values are
* converted. The {@link #setNormalizeIds(boolean)},
* {@link #setNormalizeKeys(boolean)} and {@link #setNormalizeValues(boolean)}
* parameters can be used to change this behaviour.
*
* @author Christoph Böhme
*/
Expand All @@ -45,15 +46,27 @@
public final class StreamUnicodeNormalizer
extends DefaultStreamPipe<StreamReceiver> {

/**
* The default value for {@link #setNormalizationForm(Form)}.
*/
public static final Normalizer.Form DEFAULT_NORMALIZATION_FORM =
Normalizer.Form.NFC;

private boolean normalizeIds;
private boolean normalizeKeys;
private boolean normalizeValues = true;

private Normalizer.Form normalizationForm = Normalizer.Form.NFC;
private Normalizer.Form normalizationForm = DEFAULT_NORMALIZATION_FORM;

/**
* Set to {@code true} to normalise record identifiers. The default value is
* {@code false}.
* Controls whether to normalise record identifiers. By default record
* identifiers are not normalised.
* <p>
* This parameter may be changed at any time. It becomes immediately
* effective and affects all subsequently received <i>start-record</i>
* events.
*
* @param normalizeIds if true identifiers are normalised, otherwise not.
*/
public void setNormalizeIds(final boolean normalizeIds) {
this.normalizeIds = normalizeIds;
Expand All @@ -64,8 +77,15 @@ public boolean getNormalizeIds() {
}

/**
* Set to {@code true} to normalise names of entities and literals. The
* default value is {@code false}.
* Controls whether to normalise literal and entity names. By default these
* are not normalised.
* <p>
* This parameter may be changed at any time. It becomes immediately
* effective and affects all subsequently received <i>start-entity</i> and
* <i>literal</i> events.
*
* @param normalizeKeys if true literal and entity names are normalised,
* otherwise not.
*/
public void setNormalizeKeys(final boolean normalizeKeys) {
this.normalizeKeys = normalizeKeys;
Expand All @@ -76,8 +96,14 @@ public boolean getNormalizeKeys() {
}

/**
* Set to {@code true} to normalise literal values. The default value is
* {@code true}.
* Controls whether to normalise literal values. By default these are
* normalised.
* <p>
* This parameter may be changed at any time. It becomes immediately
* effective and affects all subsequently received <i>literal</i> events.
*
* @param normalizeValues if true literal values are normalised, otherwise
* not.
*/
public void setNormalizeValues(final boolean normalizeValues) {
this.normalizeValues = normalizeValues;
Expand All @@ -88,15 +114,23 @@ public boolean getNormalizeValues() {
}

/**
* Sets the normalisation form used for normalising record identifiers,
* entity and literal names and values. The default value is {@code NFC}.
* Sets the normalisation form used for normalising identifiers, names and
* values.
* <p>
* The default value is {@value #DEFAULT_NORMALIZATION_FORM}.
* <p>
* This parameter may be set at any time during processing. It becomes
* immediately effective and affects all subsequently received events.
*
* @param normalizationForm the normalisation form to use.
*
*/
public void setNormalizationType(
public void setNormalizationForm(
final Normalizer.Form normalizationForm) {
this.normalizationForm = normalizationForm;
}

public Normalizer.Form getNormalizationType() {
public Normalizer.Form getNormalizationForm() {
return normalizationForm;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public void shouldNormalizeKeysIfConfigured() {

@Test
public void shouldNormalizeToNFDIfConfigured() {
streamUnicodeNormalizer.setNormalizationType(Normalizer.Form.NFD);
streamUnicodeNormalizer.setNormalizationForm(Normalizer.Form.NFD);
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME,
KEY_WITH_PRECOMPOSED_CHARS);
Expand Down

0 comments on commit 36cba90

Please sign in to comment.