Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added : Run WordCount MapReduce Job in Yarn

  • Loading branch information...
commit 769d0748958fa240361391171e43fc30be0e0cf8 1 parent d91dc1a
Naga Vijayapuram authored
6 README.md
View
@@ -33,10 +33,14 @@ $ ./run-simple-yarn-app.sh<br>
<b>SAMPLE RUN</b>:<br>
$ ./run-simple-yarn-app.sh -c /usr/bin/who -n 9
-<b>EXTRAS (Use Case: Send Email)</b>:<br>
+<b>EXTRAS_1 (Use Case: Send Email Demo)</b>:<br>
Want to send email from all the Yarn Containers?<br>
Place the sendemail ruby script in accessible location<br>
(/bin, /usr/bin, or some other standard location of PATH,<br>
or mention the full path in run script).<br>
$ ./run-simple-yarn-app.sh -c 'sendemail &lt;your_email_address&gt; &lt;your_email_password&gt;' -n 9
+<b>EXTRAS_2 (Use Case: Let Yarn Container run MapReduce Job)</b>:<br>
+Want to run WordCount MapReduce job under Yarn by dedicating a container for it?<br>
+$ ./run-simple-yarn-app.sh -c "/bin/cp \$LOCAL_DIRS/*/appMaster.jar /tmp\; hadoop --config /tmp/hadoop_mr_cfg jar /tmp/appMaster.jar com.hw.yarn.WordCount in out" -n 1
+
5 pom.xml
View
@@ -18,6 +18,11 @@
<artifactId>hadoop-yarn-client</artifactId>
<version>2.2.0</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>2.2.0</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
71 src/com/sansthal/hadoop/yarn/WordCount.java
View
@@ -0,0 +1,71 @@
+package com.sansthal.hadoop.yarn;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+
+public class WordCount {
+
+ public static class TokenizerMapper
+ extends Mapper<Object, Text, Text, IntWritable>{
+
+ private final static IntWritable one = new IntWritable(1);
+ private Text word = new Text();
+
+ public void map(Object key, Text value, Context context
+ ) throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ word.set(itr.nextToken());
+ context.write(word, one);
+ }
+ }
+ }
+
+ public static class IntSumReducer
+ extends Reducer<Text,IntWritable,Text,IntWritable> {
+ private IntWritable result = new IntWritable();
+
+ public void reduce(Text key, Iterable<IntWritable> values,
+ Context context
+ ) throws IOException, InterruptedException {
+ int sum = 0;
+ for (IntWritable val : values) {
+ sum += val.get();
+ }
+ result.set(sum);
+ context.write(key, result);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
+ if (otherArgs.length != 2) {
+ System.err.println("Usage: wordcount <in> <out>");
+ System.exit(2);
+ }
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word count");
+ job.setJarByClass(WordCount.class);
+ job.setMapperClass(TokenizerMapper.class);
+ job.setCombinerClass(IntSumReducer.class);
+ job.setReducerClass(IntSumReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(IntWritable.class);
+ FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
+ FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+
+}
Please sign in to comment.
Something went wrong with that request. Please try again.