Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Migrated docs.

  • Loading branch information...
commit cb72025cc2e181939e120f0997172604c1855bf2 1 parent a64f1d3
Stephen Diehl authored September 22, 2012
30  README.md
Source Rendered
@@ -14,6 +14,36 @@ not generally a good idea for performance. But this lets us a implement
14 14
 a simple shuffler using a Python defaultdict in just a few lines of code
15 15
 which is easy to understand.
16 16
 
  17
+Theory
  18
+======
  19
+
  20
+MapReduce can be thought of on a high level as being a list
  21
+homomorphism that can be written as a composition of two functions (
  22
+Reduce . Map ) . It is parallelizable because of the associativity of
  23
+the of map and reduce operations.
  24
+
  25
+```haskell
  26
+MapReduce :: [(k1, v1)] -> [(k3, v3)]
  27
+MapReduce = Reduce .  Map
  28
+
  29
+MapReduce :: a -> [(k3, v3)]
  30
+MapReduce = reducefn . shuffle . mapfn . datafn
  31
+```
  32
+
  33
+The implementation provides two functions
  34
+split ( datafn ) and shuffle.
  35
+
  36
+```haskell
  37
+shuffle :: [ (k2, v2) ] -> [(k2, [v2])]
  38
+```
  39
+
  40
+The user provides map and reduce.
  41
+
  42
+```haskell
  43
+map :: (k1,v1) -> [ (k2, v2) ]
  44
+reduce :: (k2, [v2]) -> [ (k3, v3) ]
  45
+```
  46
+
17 47
 Directions:
18 48
 ===========
19 49
 
25  example.py
@@ -12,9 +12,6 @@
12 12
 # This just enumerates all lines in the file, but is able to
13 13
 # get data from disk into ZeroMQ much faster than read/writes.
14 14
 
15  
-# Do an HDF5 data source example!
16  
-
17  
-# map :: a -> [ (k1, v1) ]
18 15
 def datafn():
19 16
     i = count(0)
20 17
     total = mm.size()
@@ -22,30 +19,11 @@ def datafn():
22 19
         yield next(i), memoryview(mm.readline())
23 20
     mm.close()
24 21
 
25  
-# MapReduce can be thought of on a high level as being a list
26  
-# homomorphism that can be written as a composition of two functions (
27  
-# Reduce . Map ) . It is parallelizable because of the associativity of
28  
-# the of map and reduce operations.
29  
-#
30  
-#   MapReduce :: [(k1, v1)] -> [(k3, v3)]
31  
-#   MapReduce = Reduce .  Map
32  
-
33  
-#   MapReduce :: a -> [(k3, v3)]
34  
-#   MapReduce = reducefn . shuffle . mapfn . datafn
35  
-
36  
-# The implementation provides two functions
37  
-# split ( datafn ) and shuffle.
38  
-
39 22
 # map :: (k1,v1) -> [ (k2, v2) ]
40 23
 def mapfn(k1, v):
41 24
     for w in v.bytes.split():
42 25
         yield w, 1
43 26
 
44  
-# shuffle :: [ (k2, v2) ] -> [(k2, [v2])]
45  
-
46  
-# In the Haskell notation
47  
-# pmap reducefn ( shuffle ( pmap mapfn ( split a ) ) )
48  
-
49 27
 # reduce :: (k2, [v2]) -> [ (k3, v3) ]
50 28
 def reducefn(k2, v):
51 29
     return sum(v)
@@ -54,9 +32,6 @@ def reducefn(k2, v):
54 32
 s = Server()
55 33
 s.connect()
56 34
 
57  
-# yaml config
58  
-# Datastore backend, Redis kaylee://
59  
-
60 35
 s.mapfn    = mapfn
61 36
 s.reducefn = reducefn
62 37
 s.datafn   = datafn
8  kaylee/server.py
@@ -94,7 +94,7 @@ def main_loop(self):
94 94
                 break
95 95
 
96 96
             # TODO: Specify number of nodes
97  
-            if len(self.workers) > 5:
  97
+            if len(self.workers) > 0:
98 98
                 if events.get(self.push_socket) == zmq.POLLOUT:
99 99
                     self.start_new_task()
100 100
                 if events.get(self.ctrl_socket) == zmq.POLLIN:
@@ -291,12 +291,6 @@ def collect_task(self):
291 291
         else:
292 292
             raise RuntimeError("Unknown wire chatter")
293 293
 
294  
-    def on_map_done(self, command, data):
295  
-        self.map_done(data)
296  
-
297  
-    def on_reduce_done(self, command, data):
298  
-        self.reduce_done(data)
299  
-
300 294
     def gen_bytecode(self):
301 295
         self.bytecode = (
302 296
             marshal.dumps(self.mapfn.func_code),

0 notes on commit cb72025

Please sign in to comment.
Something went wrong with that request. Please try again.