In [15]:
%%file ~/mrjobs/single_mapper.py
from mrjob.job import MRJob

class SingleReducer(MRJob):

    def mapper(self, _, line):
        row = line.split(',')
        identifier = row[2]
        yield(identifier,1)

if __name__ == '__main__':
    SingleReducer.run()

Writing /home/jovyan/mrjobs/single_mapper.py


In [17]:
! python ~/mrjobs/single_mapper.py sample.txt

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/single_mapper.jovyan.20221201.172119.039653
Running step 1 of 1...
job output is in /tmp/single_mapper.jovyan.20221201.172119.039653/output
Streaming final output from /tmp/single_mapper.jovyan.20221201.172119.039653/output...
"1306421"	1
"15900065"	1
"12708937"	1
"3601530"	1
"1004775"	1
"1306894"	1
"1003461"	1
"5000088"	1
"17302664"	1
Removing temp directory /tmp/single_mapper.jovyan.20221201.172119.039653...


In [18]:
%%file ~/mrjobs/mapper_reducer.py
from mrjob.job import MRJob

class MapperReducer(MRJob):

    def mapper(self, _, line):
        row = line.split(',')
        identifier = row[5]
        yield(identifier,1)
        
    def reducer(self, identifier, count):
        yield (identifier, sum(count))

if __name__ == '__main__':
    MapperReducer.run()

Writing /home/jovyan/mrjobs/mapper_reducer.py


In [19]:
! python ~/mrjobs/mapper_reducer.py sample.txt

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/mapper_reducer.jovyan.20221201.172155.978001
Running step 1 of 1...
job output is in /tmp/mapper_reducer.jovyan.20221201.172155.978001/output
Streaming final output from /tmp/mapper_reducer.jovyan.20221201.172155.978001/output...
"rondell"	1
"xiaomi"	2
"janome"	1
"lg"	1
"michelin"	1
"creed"	1
"hp"	2
Removing temp directory /tmp/mapper_reducer.jovyan.20221201.172155.978001...


In [20]:
%%file ~/mrjobs/mapper_two_reducers.py
from mrjob.job import MRJob
from mrjob.step import MRStep

class MapperTwoReducers(MRJob):

    def mapper(self, _, line):
        row = line.split(',')
        identifier = row[5]
        yield (identifier, 1)
        
    def reducer_count(self, identifier, count):
        yield None, (sum(count), identifier)
        
    def reducer_max_count(self, _, total):
        yield max(total)
        
    def steps(self):
        return [
            MRStep(mapper=self.mapper, reducer=self.reducer_count),
            MRStep(reducer=self.reducer_max_count)
        ]

if __name__ == '__main__':
    MapperTwoReducers.run()

Writing /home/jovyan/mrjobs/mapper_two_reducers.py


In [21]:
! python ~/mrjobs/mapper_two_reducers.py sample.txt

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/mapper_two_reducers.jovyan.20221201.172215.277435
Running step 1 of 2...
Running step 2 of 2...
job output is in /tmp/mapper_two_reducers.jovyan.20221201.172215.277435/output
Streaming final output from /tmp/mapper_two_reducers.jovyan.20221201.172215.277435/output...
2	"xiaomi"
Removing temp directory /tmp/mapper_two_reducers.jovyan.20221201.172215.277435...


In [22]:
%%file ~/mrjobs/with_combiner.py
from mrjob.job import MRJob

class MapperReducer(MRJob):

    def mapper(self, _, line):
        row = line.split(',')
        identifier = row[5]
        yield(identifier,1)
    
    def combiner(self, identifier, count):
        yield (identifier, sum(count))
        
    def reducer(self, identifier, count):
        yield (identifier, sum(count))

if __name__ == '__main__':
    MapperReducer.run()

Writing /home/jovyan/mrjobs/with_combiner.py


In [23]:
! python ~/mrjobs/with_combiner.py sample.txt

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/with_combiner.jovyan.20221201.172229.682141
Running step 1 of 1...
job output is in /tmp/with_combiner.jovyan.20221201.172229.682141/output
Streaming final output from /tmp/with_combiner.jovyan.20221201.172229.682141/output...
"rondell"	1
"xiaomi"	2
"janome"	1
"lg"	1
"michelin"	1
"creed"	1
"hp"	2
Removing temp directory /tmp/with_combiner.jovyan.20221201.172229.682141...


In [24]:
%%file ~/mrjobs/reduce_join.py
from mrjob.job import MRJob
from mrjob.step import MRStep

class InnerJoin(MRJob):

    def mapper(self, _, line):
        fields=line.split(',')
        if len(fields) == 9:
            join_key = fields[2]
            join_value = float(fields[6])
            yield (join_key, ('T', join_value))
            
        elif len(fields) == 2: 
            join_key  = fields[0]
            join_value = fields[1]
            yield (join_key, ('M', join_value))
            
        else:
            pass
        
    def reducer_join(self, key, values):
        master_tuples = []
        transactions_tuples = []

        for value in values:
            relation_symbol = value[0]
            if relation_symbol == 'M': 
                master_tuples.append(value[1])
            elif relation_symbol == 'T':
                transactions_tuples.append(value[1])
            else:
                pass
            
        if len(master_tuples) > 0 and len(transactions_tuples) > 0:
            for value in transactions_tuples:
                yield (master_tuples[0], value)
    
    def reducer_sum(self, location, value):
        yield(location, sum(value))
        
    

    def steps(self):
        return [
            MRStep(mapper=self.mapper,reducer=self.reducer_join),
            MRStep(reducer=self.reducer_sum)
        ]
if __name__ == '__main__':
    InnerJoin.run()

Writing /home/jovyan/mrjobs/reduce_join.py


In [10]:
! python reduce_join.py sample.txt join.txt

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/reduce_join.jovyan.20221201.165540.622126
Running step 1 of 2...
Running step 2 of 2...
job output is in /tmp/reduce_join.jovyan.20221201.165540.622126/output
Streaming final output from /tmp/reduce_join.jovyan.20221201.165540.622126/output...
"Matara"	101.03
"Jaffna"	183.27
"Kandy"	519.93
"Colombo"	1881.1699999999998
Removing temp directory /tmp/reduce_join.jovyan.20221201.165540.622126...


In [14]:
%%file ~/sample/test.txt
"sample text"

Writing /home/jovyan/sample/test.txt
