Skip to content

Commit

Permalink
Merge pull request #300 from metafacture/oersi-238
Browse files Browse the repository at this point in the history
Flux commands for analyzing input data
  • Loading branch information
fsteeg committed May 16, 2023
2 parents cd384b9 + 172a701 commit be0dbc8
Show file tree
Hide file tree
Showing 8 changed files with 511 additions and 1 deletion.
2 changes: 2 additions & 0 deletions metafix/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ dependencies {

implementation "org.metafacture:metafacture-commons:${versions.metafacture}"
implementation "org.metafacture:metafacture-flowcontrol:${versions.metafacture}"
implementation "org.metafacture:metafacture-formatting:${versions.metafacture}"
implementation "org.metafacture:metafacture-framework:${versions.metafacture}"
implementation "org.metafacture:metafacture-io:${versions.metafacture}"
implementation "org.metafacture:metafacture-javaintegration:${versions.metafacture}"
implementation "org.metafacture:metafacture-mangling:${versions.metafacture}"
implementation "org.metafacture:metafacture-triples:${versions.metafacture}"
implementation "org.metafacture:metamorph:${versions.metafacture}"

testImplementation "nl.jqno.equalsverifier:equalsverifier:${versions.equalsverifier}"
Expand Down
52 changes: 52 additions & 0 deletions metafix/src/main/java/org/metafacture/metafix/ListFixPaths.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2023 Fabian Steeg, hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.metafix;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.triples.AbstractTripleSort.Compare;

/**
* Provide a user-friendly way to list all paths available for processing in fix (see also {@link ListFixValues}).
*
* @author Fabian Steeg
*/
@Description("Lists all paths found in the input records. These paths can be used in a Fix to address fields. Options: " +
"`count` (output occurence frequency of each path, sorted by highest frequency first; default: `true`), " +
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)" +
"`index` (output individual repeated subfields and array elements with index numbers instead of '*'; default: `false`)")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("list-fix-paths")
public class ListFixPaths extends MetafixStreamAnalyzer {

public ListFixPaths() {
super("nothing()", Compare.PREDICATE);
setIndex(false);
}

public void setIndex(final boolean index) {
getFix().setEntityMemberName(index ? Metafix.DEFAULT_ENTITY_MEMBER_NAME : "*");
}

public boolean getIndex() {
return getFix().getEntityMemberName().equals(Metafix.DEFAULT_ENTITY_MEMBER_NAME);
}
}
49 changes: 49 additions & 0 deletions metafix/src/main/java/org/metafacture/metafix/ListFixValues.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright 2023 Fabian Steeg, hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.metafix;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.triples.AbstractTripleSort.Compare;

/**
* Provide a user-friendly way to list all values for a given path (see {@link ListFixPaths}).
*
* @author Fabian Steeg
*/
@Description("Lists all values found for the given path. The paths can be found using fix-list-paths. Options: " +
"`count` (output occurence frequency of each value, sorted by highest frequency first; default: `true`)" +
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("list-fix-values")
public class ListFixValues extends MetafixStreamAnalyzer {

public ListFixValues(final String path) {
super(fix(path), Compare.OBJECT);
}

private static String fix(final String path) {
return
"copy_field(\"" + path + "\",\"value.$append\")\n" +
"retain(\"value\")";
}

}
3 changes: 2 additions & 1 deletion metafix/src/main/java/org/metafacture/metafix/Metafix.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public class Metafix implements StreamPipe<StreamReceiver>, Maps {
public static final String VAR_START = "$[";

public static final Strictness DEFAULT_STRICTNESS = Strictness.PROCESS;
public static final String DEFAULT_ENTITY_MEMBER_NAME = "%d";

public static final Map<String, String> NO_VARS = Collections.emptyMap();

Expand All @@ -87,7 +88,7 @@ public class Metafix implements StreamPipe<StreamReceiver>, Maps {
private Strictness strictness = DEFAULT_STRICTNESS;
private String fixFile;
private String recordIdentifier;
private String entityMemberName = "%d";
private String entityMemberName = DEFAULT_ENTITY_MEMBER_NAME;
private boolean repeatedFieldsToEntities;
private boolean strictnessHandlesProcessExceptions;
private int entityCount;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright 2023 Fabian Steeg, hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.metafix;

import org.metafacture.formatting.ObjectTemplate;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.helpers.DefaultStreamPipe;
import org.metafacture.mangling.StreamFlattener;
import org.metafacture.triples.AbstractTripleSort.Compare;
import org.metafacture.triples.AbstractTripleSort.Order;
import org.metafacture.triples.StreamToTriples;
import org.metafacture.triples.TripleCount;
import org.metafacture.triples.TripleSort;

import java.io.FileNotFoundException;

/**
* Superclass for Metafix-based analyzer modules based on triples (see {@link org.metafacture.framework.objects.Triple}).
*
* @author Fabian Steeg
*/
/* package-private */ class MetafixStreamAnalyzer extends DefaultStreamPipe<ObjectReceiver<String>> {

private static final String DEFAULT_COUNTED_TEMPLATE = "${o}\t|\t${s}";
private static final String DEFAULT_UNCOUNTED_TEMPLATE = "${s}";

private final Metafix fix;
private boolean count = true;
private final Compare countBy;
private String template;

/* package-private */ MetafixStreamAnalyzer(final String fix, final Compare countBy) {
try {
this.fix = new Metafix(fix);
this.fix.setRepeatedFieldsToEntities(true);
}
catch (final FileNotFoundException e) {
throw new MetafactureException(e);
}
this.countBy = countBy;
}

@Override
protected void onSetReceiver() {
template = template != null ? template : count ? DEFAULT_COUNTED_TEMPLATE : DEFAULT_UNCOUNTED_TEMPLATE;
fix
.setReceiver(new StreamFlattener())
.setReceiver(new StreamToTriples())
.setReceiver(tripleCount())
.setReceiver(tripleSort())
.setReceiver(new ObjectTemplate<>(template))
.setReceiver(getReceiver());
}

private TripleCount tripleCount() {
final TripleCount tripleCount = new TripleCount();
tripleCount.setCountBy(countBy);
return tripleCount;
}

private TripleSort tripleSort() {
final TripleSort tripleSort = new TripleSort();
tripleSort.setNumeric(count);
tripleSort.setBy(count ? Compare.OBJECT : Compare.SUBJECT);
tripleSort.setOrder(count ? Order.DECREASING : Order.INCREASING);
return tripleSort;
}

@Override
public void startRecord(final String identifier) {
fix.startRecord(identifier);
}

@Override
public void endRecord() {
fix.endRecord();
}

@Override
public void startEntity(final String name) {
fix.startEntity(name);
}

@Override
public void endEntity() {
fix.endEntity();
}

@Override
public void literal(final String name, final String value) {
fix.literal(name, value);
}

@Override
protected void onCloseStream() {
fix.closeStream();
}

@Override
protected void onResetStream() {
fix.resetStream();
}

public void setCount(final boolean count) {
this.count = count;
}

public boolean getCount() {
return this.count;
}

public void setTemplate(final String template) {
this.template = template;
}

public String getTemplate() {
return this.template;
}

/* package-private */ Metafix getFix() {
return this.fix;
}

}
2 changes: 2 additions & 0 deletions metafix/src/main/resources/flux-commands.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@
# limitations under the License.
#
fix org.metafacture.metafix.Metafix
list-fix-paths org.metafacture.metafix.ListFixPaths
list-fix-values org.metafacture.metafix.ListFixValues
Loading

0 comments on commit be0dbc8

Please sign in to comment.