-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#70 add projection operates for predicate and subject, and internal o…
…bject URIs (filter out if not rdf.basekb.com) and refactor UniqTool out to simplify development of various uniq tools
- Loading branch information
Showing
9 changed files
with
362 additions
and
204 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
119 changes: 119 additions & 0 deletions
119
bakemono/src/main/java/com/ontology2/bakemono/mapmap/UniqTool.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package com.ontology2.bakemono.mapmap; | ||
|
||
import com.google.common.collect.Iterators; | ||
import com.google.common.collect.PeekingIterator; | ||
import com.ontology2.bakemono.Main; | ||
import com.ontology2.bakemono.uniq.Uniq; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.LongWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.io.compress.GzipCodec; | ||
import org.apache.hadoop.mapreduce.Job; | ||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; | ||
import org.apache.hadoop.util.Tool; | ||
|
||
|
||
/** | ||
* Created with IntelliJ IDEA. | ||
* User: paul_000 | ||
* Date: 11/12/13 | ||
* Time: 1:22 PM | ||
* To change this template use File | Settings | File Templates. | ||
*/ | ||
|
||
public abstract class UniqTool implements Tool { | ||
|
||
abstract protected Class getMapperClass(); | ||
abstract protected String getJobName(); | ||
|
||
private Configuration conf; | ||
|
||
@Override | ||
public Configuration getConf() { | ||
return this.conf; | ||
} | ||
|
||
@Override | ||
public void setConf(Configuration arg0) { | ||
this.conf=arg0; | ||
} | ||
|
||
@Override | ||
public int run(String[] arg0) throws Exception { | ||
try { | ||
PeekingIterator<String> a= Iterators.peekingIterator(Iterators.forArray(arg0)); | ||
Integer reduceTasks = parseRArgument(a); | ||
|
||
if (!a.hasNext()) | ||
usage(); | ||
|
||
String input=a.next(); | ||
|
||
if (!a.hasNext()) | ||
usage(); | ||
|
||
String output=a.next(); | ||
|
||
conf.set("mapred.compress.map.output", "true"); | ||
conf.set("mapred.output.compression.type", "BLOCK"); | ||
conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); | ||
|
||
Job job=new Job(conf,getJobName()); | ||
job.setSpeculativeExecution(false); | ||
job.setJarByClass(this.getClass()); | ||
job.setMapperClass(getMapperClass()); | ||
job.setReducerClass(Uniq.class); | ||
|
||
if(reduceTasks==null) { | ||
reduceTasks=29; // about right for AWS runs | ||
} | ||
|
||
job.setNumReduceTasks(reduceTasks); | ||
|
||
job.setMapOutputKeyClass(Text.class); | ||
job.setMapOutputValueClass(LongWritable.class); | ||
job.setOutputKeyClass(Text.class); | ||
job.setOutputValueClass(LongWritable.class); | ||
|
||
FileInputFormat.addInputPath(job, new Path(input)); | ||
FileOutputFormat.setOutputPath(job, new Path(output)); | ||
FileOutputFormat.setCompressOutput(job, true); | ||
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); | ||
|
||
// Gotcha -- this has to run before the definitions above associated with the output format because | ||
// this is going to be configured against the job as it stands a moment from now | ||
|
||
job.setOutputFormatClass(TextOutputFormat.class); | ||
|
||
return job.waitForCompletion(true) ? 0 : 1; | ||
} catch(Main.IncorrectUsageException iue) { | ||
return 2; | ||
} | ||
} | ||
|
||
public static Integer parseRArgument(PeekingIterator<String> a) | ||
throws Main.IncorrectUsageException { | ||
Integer reduceTasks=null; | ||
while(a.hasNext() && a.peek().startsWith("-")) { | ||
String flagName=a.next().substring(1).intern(); | ||
if (!a.hasNext()) | ||
usage(); | ||
|
||
String flagValue=a.next(); | ||
if (flagName=="r") { | ||
reduceTasks=Integer.parseInt(flagValue); | ||
} else { | ||
usage(); | ||
}; | ||
} | ||
return reduceTasks; | ||
} | ||
|
||
private static void usage() throws Main.IncorrectUsageException { | ||
throw new Main.IncorrectUsageException("incorrect arguments"); | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
bakemono/src/main/java/com/ontology2/bakemono/mapmap/UniqueURIObjectMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
bakemono/src/main/java/com/ontology2/bakemono/primitiveTriples/ExtractInternalURIObject.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package com.ontology2.bakemono.primitiveTriples; | ||
|
||
import com.google.common.base.Function; | ||
import org.apache.hadoop.io.Text; | ||
|
||
/** | ||
* Created with IntelliJ IDEA. | ||
* User: paul_000 | ||
* Date: 11/12/13 | ||
* Time: 1:15 PM | ||
* To change this template use File | Settings | File Templates. | ||
*/ | ||
public class ExtractInternalURIObject implements Function<PrimitiveTriple,Text> { | ||
@Override | ||
public Text apply(PrimitiveTriple t) { | ||
String o=t.getObject(); | ||
if(o.startsWith("<http://rdf.basekb.com/") && o.endsWith(">")) { | ||
return new Text(t.getObject()); | ||
} | ||
|
||
return null; | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
bakemono/src/main/java/com/ontology2/bakemono/primitiveTriples/ProjectPredicate.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package com.ontology2.bakemono.primitiveTriples; | ||
|
||
import com.google.common.base.Function; | ||
import org.apache.hadoop.io.Text; | ||
|
||
/** | ||
* Created with IntelliJ IDEA. | ||
* User: paul_000 | ||
* Date: 11/12/13 | ||
* Time: 1:18 PM | ||
* To change this template use File | Settings | File Templates. | ||
*/ | ||
public class ProjectPredicate implements Function<PrimitiveTriple,Text> { | ||
@Override | ||
public Text apply(PrimitiveTriple t) { | ||
return new Text(t.getSubject()); | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
bakemono/src/main/java/com/ontology2/bakemono/primitiveTriples/ProjectSubject.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package com.ontology2.bakemono.primitiveTriples; | ||
|
||
import com.google.common.base.Function; | ||
import org.apache.hadoop.io.Text; | ||
|
||
/** | ||
* Created with IntelliJ IDEA. | ||
* User: paul_000 | ||
* Date: 11/12/13 | ||
* Time: 1:16 PM | ||
* To change this template use File | Settings | File Templates. | ||
*/ | ||
public class ProjectSubject implements Function<PrimitiveTriple,Text> { | ||
@Override | ||
public Text apply(PrimitiveTriple t) { | ||
return new Text(t.getSubject()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters