In [120]:
# We will need these so we can reload modules as we modify them
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [122]:
# Utilize last week's HW to count the number of nodes in the graph
from MRJob_Explore import explore

def countNodes(filename):

    mr_job = explore(args=[filename, '--no-strict-protocols', '--exploreType', 'nodes'])
#                            '-r', 'emr', '--emr-job-flow-id', clusterId,])
    output = []
    
    with mr_job.make_runner() as runner:
        runner.run()
        
        for line in runner.stream_output():
            out = mr_job.parse_output_line(line)
            print 'Number of nodes =', '{:,d}'.format(out[1])
    
    return out[1]

In [123]:
from PageRank_Initialize import initialize
from PageRank_Iterate import iterate
from PageRank_TopN import topN

def initializePR(filename, runnerType, outputDir, printOutput):
    
    if runnerType == 'local':
        mr_job = initialize(args=[filename, '--no-strict-protocols'])
        
    elif runnerType == 'hadoop':
        !hdfs dfs -rm -r {outputDir}
        mr_job = initialize(args=[filename, '--no-strict-protocols', '-r', 'hadoop', '--hadoop-home', '/usr/',
                                 '--output-dir', outputDir])
        
    elif runnerType == 'emr':
        !aws s3 rm --quiet {outputDir}
        mr_job = initialize(args=[filename, '--no-strict-protocols', '--no-output', 
                                  '-r', 'emr', '--emr-job-flow-id', clusterId, '--output-dir', outputDir])
        

    with mr_job.make_runner() as runner:
        runner.run()

        if printOutput:
            for line in runner.stream_output():
                print mr_job.parse_output_line(line)

def iteratePR(filename, n, a, runnerType, outputDir, iterations, printOutput):

    output = []

    if runnerType == 'local':
        mr_job = iterate(args=[filename, '--no-strict-protocols', '--numNodes=' + str(n), 
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])

    elif runnerType == 'hadoop':
        !hdfs dfs -rm -r {outputDir}
        mr_job = iterate(args=[filename, '--no-strict-protocols', '-r', 'hadoop', '--hadoop-home', '/usr/',
                               '--output-dir', outputDir, '--numNodes=' + str(n),
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])

    elif runnerType == 'emr':
        !aws s3 rm --quiet {outputDir}
        mr_job = iterate(args=[filename, '--no-strict-protocols', '--no-output', '--numNodes=' + str(n),
                               '-r', 'emr', '--emr-job-flow-id', clusterId, '--output-dir', outputDir,
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])


    with mr_job.make_runner() as runner:
        runner.run()

        if runnerType != 'emr':
            for line in runner.stream_output():
                out = mr_job.parse_output_line(line)
                output.append(out)
                if printOutput:
                    print out
                    
def topNPR(filename, n, runnerType, outputDir, printOutput):

    output = []
    
    if runnerType == 'local':
        mr_job = topN(args=[filename, '--no-strict-protocols', '--top=' + str(n)])

    elif runnerType == 'hadoop':
        !hdfs dfs -rm -r {outputDir}
        mr_job = topN(args=[filename, '--no-strict-protocols', '-r', 'hadoop', '--hadoop-home', '/usr/',
                            '--output-dir', outputDir, '--top=' + str(n)])

    elif runnerType == 'emr':
        !aws s3 rm --quiet {outputDir}
        mr_job = topN(args=[filename, '--no-strict-protocols', '--no-output', '--top=' + str(n),
                            '-r', 'emr', '--emr-job-flow-id', clusterId, '--output-dir', outputDir])


    with mr_job.make_runner() as runner:
        runner.run()

        if runnerType != 'emr':
            for line in runner.stream_output():
                out = mr_job.parse_output_line(line)
                output.append(out)
                if printOutput:
                    print out

In [5]:
inputFile = 'randNet.txt'
outputDir = '/user/miki/week09/randNet/initialize'
n = countNodes(inputFile)

initializePR(inputFile, 'hadoop', outputDir, False)

# Note: we have to do this to preserve JSON protocol for reading in on next job
localFilename = 'randNet_initialized.txt'
!rm {localFilename}
!hdfs dfs -copyToLocal {outputDir + '/part-00000'} {localFilename}

Number of nodes = 100
Deleted /user/miki/week09/randNet/initialize


The have been translated as follows
 mapred.reduce.tasks: mapreduce.job.reduces


In [18]:
localFilename = 'randNet_initialized.txt'
outputDir = '/user/miki/week09/randNet/result'

k = 1

iteratePR(localFilename, n, 0.85, 'hadoop', outputDir, k, False)

# Note: we have to do this to preserve JSON protocol for reading in on next job
localFilename = 'randNet_result.txt'
!rm {localFilename}
!hdfs dfs -copyToLocal {outputDir + '/part-00000'} {localFilename}

rm: `/user/miki/week09/randNet/result': No such file or directory


The have been translated as follows
 mapred.reduce.tasks: mapreduce.job.reduces


In [20]:
outputDir = '/user/miki/week09/randNet/top_all'

top = 10

topNPR(localFilename, top, 'hadoop', outputDir, True)

rm: `/user/miki/week09/randNet/top': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


(0.016290768953268954, '100')
(0.016000901875901877, '15')
(0.015994462481962485, '63')
(0.015369935619935621, '9')
(0.014743380230880232, '74')
(0.014546825396825397, '58')
(0.01449887196137196, '85')
(0.014303755966255967, '61')
(0.014077763902763904, '71')
(0.013942843267843267, '88')


# HW 9.4: Topic-specific PageRank implementation using MRJob

Modify your PageRank implementation to produce a topic specific PageRank implementation

## Initialization for a topic

Since this corpus is small, we will read it in locally and create a topicCount file, so we know how many nodes belong to each topic. This file, along with the topics file will be fed into the initialization MRJob.

In the final reducer of the MRJob (where we normalized the initial PageRank values to be 1/n), we also initialize the weights as follows:

$$
\text{weight}_{ij} = 
\begin{cases}
\frac{\beta}{|T_j|}
&\mbox{if } i \mbox{ in topic } T_j \\
\frac{1-\beta}{N-|T_j|}
&\mbox{if } i \mbox{ not in topic } T_j \\
\end{cases}
$$

In this case, we set $\beta = 0.99$.

In [124]:
from collections import Counter
#We need to do some aggregation of topics
topics = Counter()

with open('randNet_topics.txt', 'r') as myfile:
    for line in myfile:
        fields = line.strip().split('\t')
        topic = fields[1]
        topics[topic] += 1

with open('randNet_topicCount.txt', 'w') as outfile:
    outfile.writelines([str(item) + '\t' + str(topics[item]) + '\n' for item in topics])

In [125]:
%%writefile TSPageRank_Initialize.py
from __future__ import division
from mrjob.job import MRJob
from mrjob.step import MRStep

class initialize(MRJob):
        
    #------------------
    # Configurations: 
    
    def configure_options(self):
        super(initialize, self).configure_options()
        self.add_passthrough_option('--beta', default=0.99, type='float')
        self.add_passthrough_option('--topic', default='1', type='string')
    
    """
    Get all nodes
    """
    
    #------------------
    # Mapper:
    # - We need to make sure we emit a line for each node in the graph
    # - Right now there are no lines for nodes with no neighbors
    
    def mapper(self, _, line):
        
        # Split fields
        
        fields = line.split('\t')
        key = fields[0]
        stripe = eval(fields[1])
        
        # Emit the key and stripe
        
        yield key, stripe
        
        # For each neighbor, emit a 0
        # We just do this so we catch all nodes
        
        for neighbor in stripe:
            yield neighbor, 0
            
    #------------------
    # Reducer:
    # - We need to deduplicate each of our nodes
    # - If we encounter a value that is a dictionary, these are the neighbors
    # - If we do not encounter any dictionaries, then the node is dangling, we emit an empty neighbor list
    
    def reducer(self, key, values):       
        stripe = {}
        
        # Loop through values for a key to see if it has neighbors
        # If it does, we need to keep the neighbors
        
        for val in values:
            if type(val) == type(stripe):
                stripe = val
                
        # For each key, emit only one thing, which is the neighbor list
        # We should now have a line for each node, even if the neighbor list is empty
        
        yield key, stripe
        
    """
    Initialize topic weights (v_ji)
    Normalize length
    """
    
    #------------------
    # Mapper:
    # - Find total number of nodes
    
    # Initialize total to 0
    def mapper_norm_init(self):
        self.total = 0.0
    
    # For each key we encounter, increment total
    # We know that we will only encounter each node once
    def mapper_norm(self, key, value):
        yield key, value
        self.total += 1
        
    # Emit the total number of nodes we saw
    def mapper_norm_final(self):
        yield '*', self.total
    
    #------------------
    # Combiner:
    # - Partial sum of total nodes
    
    # To combine the totals if we have multiple mappers
    def combiner_norm(self, key, values):
        if key == '*':
            yield key, sum(values)
        else:
            for val in values:
                yield key, val
     
    #------------------
    # Reducer:
    # - Partial sum of total nodes
    # - Calculate weight vector (v_ji) for each node
    
    # Initialize the totalNodes to 0
    # Read topics and topicCount (only for topic of interest) into memory
    def reducer_norm_init(self):
        self.totalNodes = 0
        self.topics = {}
        self.topicCount = 0.0
        
        with open('randNet_topics.txt', 'r') as f1:
            for line in f1:
                fields = line.strip().split('\t')
                node = fields[0]
                topic = fields[1]
                self.topics[node] = topic
        
        with open('randNet_topicCount.txt', 'r') as f2:
            for line in f2:
                fields = line.strip().split('\t')
                topic = fields[0]
                count = eval(fields[1])
                if topic == self.options.topic:
                    self.topicCount = count
       
    # If the key is '*', save the sum of the values
    # Otherwise, yield the key, stripe, PageRank (1/n) and weight
    def reducer_norm(self, key, values):
        if key == '*':
            self.totalNodes = sum(values)
        else:
            
            # Is this key is in the topic of interest?
            keyInTopic = self.topics[key] == self.options.topic
            
            # If the key is part of our topic, weight = beta / size of topic
            if keyInTopic:
                weight = self.options.beta / self.topicCount
            
            # Otherwise, weight = (1 - beta) / size of not-topic
            else:
                weight = (1 - self.options.beta) / (self.totalNodes - self.topicCount)
                
            for val in values:
                yield key, (val, 1 / self.totalNodes, weight)
    
    """
    Multi-step pipeline
    """
    def steps(self):
        return [
            MRStep(mapper=self.mapper,
                   reducer=self.reducer),
            MRStep(mapper_init=self.mapper_norm_init,
                   mapper=self.mapper_norm,
                   mapper_final=self.mapper_norm_final,
                   combiner=self.combiner_norm,
                   reducer_init=self.reducer_norm_init,
                   reducer=self.reducer_norm,
                   jobconf={'mapreduce.job.reduces': 1})
        ]

if __name__ == '__main__':
    initialize.run()

Overwriting TSPageRank_Initialize.py


In [126]:
from TSPageRank_Initialize import initialize

def initializeTSPR(filename, topic, beta, runnerType, outputDir, printOutput):
    
    if runnerType == 'local':
        mr_job = initialize(args=[filename, '--no-strict-protocols', '--topic', topic, '--beta=' + str(beta),
                                 '--file', 'randNet_topics.txt', '--file', 'randNet_topicCount.txt'])
        
    elif runnerType == 'hadoop':
        !hdfs dfs -rm -r {outputDir}
        mr_job = initialize(args=[filename, '--no-strict-protocols', '-r', 'hadoop', '--hadoop-home', '/usr/',
                                 '--output-dir', outputDir, '--topic', topic, '--beta=' + str(beta),
                                 '--file', 'randNet_topics.txt', '--file', 'randNet_topicCount.txt'])
        
    elif runnerType == 'emr':
        !aws s3 rm --quiet {outputDir}
        mr_job = initialize(args=[filename, '--no-strict-protocols', '--no-output', 
                                  '-r', 'emr', '--emr-job-flow-id', clusterId, '--output-dir', outputDir,
                                  '--topic', topic, '--beta=' + str(beta),
                                  '--file', 'randNet_topics.txt', '--file', 'randNet_topicCount.txt'])
        

    with mr_job.make_runner() as runner:
        runner.run()

        if printOutput:
            for line in runner.stream_output():
                print mr_job.parse_output_line(line)

## Check initialization output for topic 1

In [68]:
inputFile = 'randNet.txt'
outputDir = '/user/miki/week09/randNet/initialize'
n = countNodes(inputFile)
topic = '1'

initializeTSPR(inputFile, topic, 0.99, 'hadoop', outputDir + topic, False)

# Note: we have to do this to preserve JSON protocol for reading in on next job
# For some reason, reading input from HDFS does not work :(
localFilename = 'randNet/initialize' + topic + '.txt'
!rm {localFilename}
!hdfs dfs -copyToLocal {outputDir + topic + '/part-00000'} {localFilename}

Number of nodes = 100
Deleted /user/miki/week09/randNet/initialize1


The have been translated as follows
 mapred.reduce.tasks: mapreduce.job.reduces


## Iteration for a topic

This should be the same logic as the non-topic-specific version, except when we calculate the PageRank (accounting for teleporting and distributing dangling mass), we use a different weighting. Formerly, we used the following formula:

$$
\text{PR}_{\text{new}} = (1 - \alpha) \bigg(\frac{1}{n}\bigg) + \alpha \bigg(\frac{m}{n} + \text{PR}_{\text{old}}\bigg)
$$

The first $1/n$ term indicated that we could jump to any other node with uniform probability. However, we now have a different probability, which we calculated in the initialization step

In [127]:
%%writefile TSPageRank_Iterate.py
from __future__ import division
from mrjob.job import MRJob
from mrjob.step import MRStep
from mrjob.protocol import JSONProtocol

class iterate(MRJob):

    #------------------
    # Configurations: 
    
    def configure_options(self):
        super(iterate, self).configure_options()
        self.add_passthrough_option('--numNodes', default=1, type='int')
        self.add_passthrough_option('--alpha', default=0.85, type='float')
        self.add_passthrough_option('--iterations', default=1, type='int')
    
    INPUT_PROTOCOL = JSONProtocol
    
    #------------------
    # Mapper:
    # - Find the number of neighbors for the node
    # - Distribute current PageRank among all neighbors
    # - If there are no neighbors, keep track of dangling mass
    
    def mapper_dist(self, key, value):

        # Divide the current PageRank by the number of neighbors
        
        numNeighbors = len(value[0])
        PageRank = value[1]
        
        # If there are neighbors, distribute the PageRank to each neighbors
        
        if numNeighbors > 0:
            for neighbor in value[0]:
                yield neighbor, PageRank / numNeighbors
                
        # If there are no neighbors, we need to account for this dangling node
        
        else:
            yield '*dangling', PageRank
        
        # Maintain the graph structure and weights
        
        yield key, (value[0], value[2])
     
    #------------------
    # Reducer:
    # - For each node, accumulate PageRank distributed from other nodes
    # - Maintain graph structure
    
    def reducer_dist(self, key, values):
        
        new_PageRank = 0.0
        weight = 0.0
        neighbors = {}
        
        for val in values:
            if type(val) == type(0.0):
                new_PageRank += val
            else:
                neighbors = val[0]
                weight = val[1]
                
        
        yield key, (neighbors, new_PageRank, weight)

    #------------------
    # Mapper: 
    # - Account for teleportation
    # - Distribute dangling mass to all nodes
    
    # Below is doing it with only one reducer
    # This isn't a good way to do it, but couldn't figure out a better way
    
    def mapper_dangle(self, key, value):
        yield key, value
        
    def reducer_init(self):
        self.m = 0.0
        
    def reducer_dangle(self, key, values):
        
        PageRank = 0.0
        neighbors = {}
        weight = 0.0
        
        for val in values:
            PageRank = val[1]
            neighbors = val[0]
            weight = val[2]
            
        if key == '*dangling':
            self.m = PageRank
        else:
            a = self.options.alpha
            n = self.options.numNodes
            new_PageRank = (1 - a) * weight + a * (self.m / n + PageRank)
            yield key, (neighbors, new_PageRank, weight)
            
    #------------------
    # Pipeline:
    
    def steps(self):
        return ([
            MRStep(mapper=self.mapper_dist,
                   reducer=self.reducer_dist),
            MRStep(mapper=self.mapper_dangle,
                   reducer_init=self.reducer_init,
                   reducer=self.reducer_dangle,
                   jobconf={'mapreduce.job.reduces': 1})
            ] * self.options.iterations)

if __name__ == '__main__':
    iterate.run()

Overwriting TSPageRank_Iterate.py


In [128]:
from TSPageRank_Iterate import iterate

def iterateTSPR(filename, n, a, runnerType, outputDir, iterations, printOutput):

    output = []

    if runnerType == 'local':
        mr_job = iterate(args=[filename, '--no-strict-protocols', '--numNodes=' + str(n),
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])

    elif runnerType == 'hadoop':
        !hdfs dfs -rm -r {outputDir}
        mr_job = iterate(args=[filename, '--no-strict-protocols', '-r', 'hadoop', '--hadoop-home', '/usr/',
                               '--output-dir', outputDir, '--numNodes=' + str(n),
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])

    elif runnerType == 'emr':
        !aws s3 rm --quiet {outputDir}
        mr_job = iterate(args=[filename, '--no-strict-protocols', '--no-output', '--numNodes=' + str(n),
                               '-r', 'emr', '--emr-job-flow-id', clusterId, '--output-dir', outputDir,
                               '--alpha=' + str(a), '--iterations=' + str(iterations)])


    with mr_job.make_runner() as runner:
        runner.run()

        if runnerType != 'emr':
            for line in runner.stream_output():
                out = mr_job.parse_output_line(line)
                output.append(out)
                if printOutput:
                    print out

In [106]:
inputFile = 'randNet/initialize1.txt'
outputDir = '/user/miki/week09/randNet/result1'

topic = '1'
k = 10

iterateTSPR(inputFile, n, 0.85, 'hadoop', outputDir, k, False)

# Note: we have to do this to preserve JSON protocol for reading in on next job
# For some reason, reading input from HDFS does not work :(
localFilename = 'randNet/result' + topic + '.txt'
!rm {localFilename}
!hdfs dfs -copyToLocal {outputDir + '/part-00000'} {localFilename}

rm: cannot remove `randNet/result1.txt': No such file or directory


In [107]:
outputDir = '/user/miki/week09/randNet/top'

top = 10

topNPR(localFilename, top, 'hadoop', outputDir + topic, True)

rm: `/user/miki/week09/randNet/top1': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


(0.02064589832522501, '32')
(0.020547569626787363, '77')
(0.01975431310073244, '52')
(0.019529238246263958, '92')
(0.018565525448276277, '10')
(0.01852253982706892, '27')
(0.01784051057183225, '85')
(0.01769238950839094, '98')
(0.01751412867497823, '46')
(0.01602812131786077, '74')


## Run Topic-specific PageRank for topics 1-10

In [129]:
def moveFromHadoop(source, destination):
    # Note: we have to do this to preserve JSON protocol for reading in on next job
    # For some reason, reading input from HDFS does not work :(   
    !rm {destination}
    !hdfs dfs -copyToLocal {source} {destination}

    
for i in range(10):
    topic = str(i + 1)
    
    # Initialize with uniform PageRank and weight vector for topic i + 1
    inputFile = 'randNet.txt'
    outputDir = '/user/miki/week09/randNet/initialize'
    n = countNodes(inputFile)

    initializeTSPR(inputFile, topic, 0.99, 'hadoop', outputDir + topic, False)

    localFilename = 'randNet/initialize' + topic + '.txt'
    moveFromHadoop(outputDir + topic + '/part-00000', localFilename)
    
    # Iterate PageRank algorithm for 10 iterations
    outputDir = '/user/miki/week09/randNet/result'

    k = 10

    iterateTSPR(localFilename, n, 0.85, 'hadoop', outputDir + topic, k, False)

    localFilename = 'randNet/result' + topic + '.txt'
    moveFromHadoop(outputDir + topic + '/part-00000', localFilename)
    
    # Find top 10 pages
    outputDir = '/user/miki/week09/randNet/top'

    top = 10

    topNPR(localFilename, top, 'hadoop', outputDir + topic, False)
    
    localFilename = 'randNet/top' + topic + '.txt'
    moveFromHadoop(outputDir + topic + '/part-00000', localFilename)
    

Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize1': No such file or directory
Deleted /user/miki/week09/randNet/result1
Deleted /user/miki/week09/randNet/top1


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize2': No such file or directory
rm: cannot remove `randNet/initialize2.txt': No such file or directory
rm: `/user/miki/week09/randNet/result2': No such file or directory
rm: cannot remove `randNet/result2.txt': No such file or directory
rm: `/user/miki/week09/randNet/top2': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top2.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize3': No such file or directory
rm: cannot remove `randNet/initialize3.txt': No such file or directory
rm: `/user/miki/week09/randNet/result3': No such file or directory
rm: cannot remove `randNet/result3.txt': No such file or directory
rm: `/user/miki/week09/randNet/top3': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top3.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize4': No such file or directory
rm: cannot remove `randNet/initialize4.txt': No such file or directory
rm: `/user/miki/week09/randNet/result4': No such file or directory
rm: cannot remove `randNet/result4.txt': No such file or directory
rm: `/user/miki/week09/randNet/top4': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top4.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize5': No such file or directory
rm: cannot remove `randNet/initialize5.txt': No such file or directory
rm: `/user/miki/week09/randNet/result5': No such file or directory
rm: cannot remove `randNet/result5.txt': No such file or directory
rm: `/user/miki/week09/randNet/top5': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top5.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize6': No such file or directory
rm: cannot remove `randNet/initialize6.txt': No such file or directory
rm: `/user/miki/week09/randNet/result6': No such file or directory
rm: cannot remove `randNet/result6.txt': No such file or directory
rm: `/user/miki/week09/randNet/top6': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top6.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize7': No such file or directory
rm: cannot remove `randNet/initialize7.txt': No such file or directory
rm: `/user/miki/week09/randNet/result7': No such file or directory
rm: cannot remove `randNet/result7.txt': No such file or directory
rm: `/user/miki/week09/randNet/top7': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top7.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize8': No such file or directory
rm: cannot remove `randNet/initialize8.txt': No such file or directory
rm: `/user/miki/week09/randNet/result8': No such file or directory
rm: cannot remove `randNet/result8.txt': No such file or directory
rm: `/user/miki/week09/randNet/top8': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top8.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize9': No such file or directory
rm: cannot remove `randNet/initialize9.txt': No such file or directory
rm: `/user/miki/week09/randNet/result9': No such file or directory
rm: cannot remove `randNet/result9.txt': No such file or directory
rm: `/user/miki/week09/randNet/top9': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top9.txt': No such file or directory
Number of nodes = 100
rm: `/user/miki/week09/randNet/initialize10': No such file or directory
rm: cannot remove `randNet/initialize10.txt': No such file or directory
rm: `/user/miki/week09/randNet/result10': No such file or directory
rm: cannot remove `randNet/result10.txt': No such file or directory
rm: `/user/miki/week09/randNet/top10': No such file or directory


The have been translated as follows
 mapred.text.key.comparator.options: mapreduce.partition.keycomparator.options
mapred.text.key.partitioner.options: mapreduce.partition.keypartitioner.options
mapred.reduce.tasks: mapreduce.job.reduces
mapred.output.key.comparator.class: mapreduce.job.output.key.comparator.class


rm: cannot remove `randNet/top10.txt': No such file or directory
