Merge 42f2b9a into 5ba0fb1

stanford-mast · Dec 29, 2019 · b68d642 · b68d642
2 parents 5ba0fb1 + 42f2b9a
commit b68d642
Show file tree

Hide file tree

Showing 50 changed files with 498 additions and 388 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,6 @@ eggs/
 .eggs/
 *.egg-info/
 *.egg
+
+# Editor related.
+settings.json
diff --git a/.travis.yml b/.travis.yml
@@ -1,7 +1,7 @@
 language: python
 
 python:
-    - 2.7
+    - 3.6
 
 install:
     - pip install .

diff --git a/nn_dataflow/core/data_layout.py b/nn_dataflow/core/data_layout.py
@@ -131,7 +131,7 @@ def nhops_to(self, fmap_range, *dest_list, **kwargs):
                 # Each forward step, get the min-distance pair of source and
                 # destination.
                 src, dst = min(itertools.product(src_set, dst_set),
-                               key=lambda (s, d): d.hop_dist(s))
+                               key=lambda sd: sd[1].hop_dist(sd[0]))
                 dst_set.remove(dst)
                 src_set.add(dst)
                 nhops += total_size * dst.hop_dist(src)

diff --git a/nn_dataflow/core/fmap_range.py b/nn_dataflow/core/fmap_range.py
@@ -133,17 +133,26 @@ def __contains__(self, fpos):
                    in zip(fpos, self.fp_beg, self.fp_end))
 
     def __lt__(self, other):
-        if isinstance(other, self.__class__):
-            return self._compare(other) < 0
-        return NotImplemented
+        assert isinstance(other, self.__class__), \
+                "FmapRange: invalid type to compare: {}".format(type(other))
+        return self._compare(other) < 0
 
     def __eq__(self, other):
-        if isinstance(other, self.__class__):
-            try:
-                return self._compare(other) == 0
-            except ValueError:
-                return False
-        return NotImplemented
+        assert isinstance(other, self.__class__), \
+                "FmapRange: invalid type to compare: {}".format(type(other))
+        try:
+            return self._compare(other) == 0
+        except ValueError:
+            return False
+
+    def __hash__(self):
+        '''
+        If a class does not define an __eq__() method, it should not define a
+        __hash__() operation either; if it defines __eq__() but not __hash__(),
+        its instances will not be usable as items in hashable collections.
+        See https://docs.python.org/3.1/reference/datamodel.html?highlight=hash#object.__hash__
+        '''
+        return hash((self.fp_beg, self.fp_end))
 
     def _compare(self, other):
         # Identical ranges.
@@ -181,27 +190,17 @@ def __repr__(self):
 
     def __ne__(self, other):
         r = self.__eq__(other)
-        if r is NotImplemented:
-            # "not" NotImplemented will be True.
-            return r
         return not r
 
     def __gt__(self, other):
         r = self.__lt__(other)
-        if r is NotImplemented:
-            # NotImplemented "and" X will be X.
-            return r
         return not r and self.__ne__(other)
 
     def __le__(self, other):
-        # NotImplemented "or" X is safe.
         return self.__lt__(other) or self.__eq__(other)
 
     def __ge__(self, other):
         r = self.__lt__(other)
-        if r is NotImplemented:
-            # "not" NotImplemented will be True.
-            return r
         return not r
 
 

diff --git a/nn_dataflow/core/loop_blocking.py b/nn_dataflow/core/loop_blocking.py
@@ -15,7 +15,7 @@
 
 import heapq
 import itertools
-from multiprocessing import Pool
+from multiprocessing.pool import Pool
 
 from . import loop_blocking_solver
 from . import loop_enum as le
@@ -65,7 +65,7 @@ def skip_conv(bl_ts, bl_ords):
 
     outer_level_innermost_nt_loop = None
 
-    for t_, ord_ in itertools.izip_longest(bl_ts, bl_ords, fillvalue=None):
+    for t_, ord_ in itertools.zip_longest(bl_ts, bl_ords, fillvalue=None):
 
         # Non-trivial loops.
         nt_loops = [lpe for lpe in range(le.NUM) if t_[lpe] > 1]
@@ -130,6 +130,7 @@ def _gen_bl_ts():
     def _sweep():
         ''' Sweep all. '''
         is_conv_loops = (nested_loop_desc.data_loops == ConvLayer.data_loops())
+        counter = 0
         for bl_ts, bl_ords in itertools.product(_gen_bl_ts(), gen_ords):
             if is_conv_loops and skip_conv(bl_ts, bl_ords):
                 continue
@@ -138,6 +139,7 @@ def _sweep():
             lbs = LoopBlockingScheme(
                 nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
                 options)
+            counter += 1
             yield lbs
 
     return heapq.nsmallest(options.ntops, _sweep(),
@@ -188,7 +190,7 @@ def retrieve_result_st():
         retrieve_func = retrieve_result()
     else:
         pool = None
-        apply_func = apply
+        apply_func = util.apply
         retrieve_func = retrieve_result_st()
 
     # Exhaustive generators.

diff --git a/nn_dataflow/core/loop_blocking_scheme.py b/nn_dataflow/core/loop_blocking_scheme.py
@@ -91,7 +91,8 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
                 'LoopBlockingScheme: bl_ts elements have invalid length.'
         assert len(bl_ords) == BL.NUM, \
                 'LoopBlockingScheme: bl_ords has invalid length.'
-        assert all(sorted(bl_ord) == range(le.NUM) for bl_ord in bl_ords), \
+        assert all(tuple(sorted(bl_ord)) == tuple(range(le.NUM)) \
+                   for bl_ord in bl_ords), \
                 'LoopBlockingScheme: bl_ords elements are invalid.'
 
         self.bl_ts = [tuple(bl_t) for bl_t in bl_ts]
@@ -143,13 +144,7 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
         self.src_is_dram = (resource.src_data_region.type == NodeRegion.DRAM)
         self.dst_is_dram = (resource.dst_data_region.type == NodeRegion.DRAM)
 
-        # Check resource for filter pinning.
         self.filter_pinned = False
-        if resource.no_time_mux:
-            if all(self.bl_ts[0][lpe] == 1 for lpe
-                   in self.nld.data_loops[de.FIL].loops()):
-                self.filter_pinned = True
-                self.fetch[0][de.FIL] = 0
 
         # If data regions are not DRAM, can only access once, no spilling.
         if not self.src_is_dram:
@@ -215,6 +210,13 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
         # Remote gbuf access.
         self.remote_gbuf_access = [0.] * de.NUM
 
+        # Check resource for filter pinning.
+        if resource.no_time_mux:
+            if all(self.bl_ts[0][lpe] == 1 for lpe
+                   in self.nld.data_loops[de.FIL].loops()):
+                self.filter_pinned = True
+                self.fetch[0][de.FIL] = 0
+
     def is_valid(self):
         '''
         Whether is a valid scheme.
@@ -572,7 +574,7 @@ def _gen_index_single_level(t_x, order_x):
         gens = [None] * le.NUM
         rev_order = [le.NUM - 1 - o for o in order_x]
         for lpe in range(le.NUM):
-            gens[rev_order[lpe]] = xrange(t_x[lpe])
+            gens[rev_order[lpe]] = range(t_x[lpe])
 
         for idx in itertools.product(*gens):
             # Index now is in the loop order from outer to inner. Reorder to be

diff --git a/nn_dataflow/core/network.py b/nn_dataflow/core/network.py
@@ -83,7 +83,7 @@ def add(self, layer_name, layer, prevs=None):
                                    'has not been added to the network'.
                                    format(p))
         else:
-            prevs = (self.layer_dict.keys()[-1],)
+            prevs = (list(self.layer_dict.keys())[-1],)
 
         self.layer_dict[layer_name] = layer
         self.prevs_dict[layer_name] = prevs

diff --git a/nn_dataflow/core/nn_dataflow.py b/nn_dataflow/core/nn_dataflow.py
@@ -325,23 +325,12 @@ def _gen_input_layout(self, options):
                 regions=(input_region,),
                 parts=(part.projection(input_region, appl2frng=True),))
 
-            if ext_layers:
-                for ext_parts in itertools.product(
-                        *[partition.gen_partition(ext_layer, self.batch_size,
-                                                  ext_region.dim, options,
-                                                  guaranteed=True)
-                          for ext_layer in ext_layers]):
-                    ext_layout_dict = dict(zip(
-                        ext_layer_names,
-                        [DataLayout(
-                            frngs=(ext_frng,),
-                            regions=(ext_region,),
-                            parts=(ext_part.projection(ext_region,
-                                                       appl2frng=True),))
-                         for ext_part, ext_frng in zip(ext_parts, ext_frngs)]))
-
-                    yield input_layout, ext_layout_dict
-
-            else:
-                yield input_layout, None
-
+            ext_layout_dict = dict(zip(
+                ext_layer_names,
+                [DataLayout(
+                    frngs=(ext_frng,),
+                    regions=(ext_region,),
+                    parts=(part.projection(ext_region, appl2frng=True),))
+                 for ext_frng in ext_frngs])) if ext_layers else None
+
+            yield input_layout, ext_layout_dict
diff --git a/nn_dataflow/core/partition.py b/nn_dataflow/core/partition.py
@@ -325,7 +325,7 @@ def _unit_nhops_to_fil(layer, filter_nodes, fil_dict, fwd=False):
                 # Each forward step, get the min-distance pair of source and
                 # destination.
                 src, dst = min(itertools.product(src_set, dst_set),
-                               key=lambda (s, d): d.hop_dist(s))
+                               key=lambda sd: sd[1].hop_dist(sd[0]))
                 dst_set.remove(dst)
                 src_set.add(dst)
                 nhops += fil_size * dst.hop_dist(src)

diff --git a/nn_dataflow/core/phy_dim2.py b/nn_dataflow/core/phy_dim2.py
@@ -14,6 +14,7 @@
 """
 
 from collections import namedtuple
+from functools import reduce
 from operator import add, sub, neg, mul
 
 class PhyDim2(namedtuple('PhyDim2', ['h', 'w'])):

diff --git a/nn_dataflow/core/pipeline_segment.py b/nn_dataflow/core/pipeline_segment.py
@@ -112,6 +112,7 @@ def gen_constraint(self, max_time_overhead=float('inf')):
         # hint A are larger than the corresponding values in hint B, A will be
         # generated after B.
         vals = [sorted(v) for v in vals]
+        syms = list(syms)
 
         if self.cstr_topbat_idx is not None:
             # Tovhd =  (1 + 1/to + 1 + 1/to + ...) / tb
@@ -124,7 +125,8 @@ def gen_constraint(self, max_time_overhead=float('inf')):
 
             constraint = tuple()
 
-            for atpl in self._subs_symargs(self.cstr_symargs, zip(syms, valp)):
+            for atpl in self._subs_symargs(self.cstr_symargs,
+                                           tuple(zip(syms, valp))):
                 ctpl = tuple()
                 for a in atpl:
                     # Construct kwargs, adjust the types of the values.

diff --git a/nn_dataflow/core/scheduling.py b/nn_dataflow/core/scheduling.py
@@ -325,7 +325,7 @@ def _get_result(self, lbs, part, ofmap_layout, sched_seq, unit_nhops):
         scheme['fetch'] = lbs.fetch
 
         # Loop blocking.
-        lp_ts = zip(*lbs.bl_ts)
+        lp_ts = list(zip(*lbs.bl_ts))
         scheme['ti'] = tuple(lp_ts[le.IFM])
         scheme['to'] = tuple(lp_ts[le.OFM])
         scheme['tb'] = tuple(lp_ts[le.BAT])

diff --git a/nn_dataflow/nns/resnet50.py b/nn_dataflow/nns/resnet50.py
@@ -0,0 +1,95 @@
+""" $lic$
+Copyright (C) 2016-2019 by The Board of Trustees of Stanford University
+
+This program is free software: you can redistribute it and/or modify it under
+the terms of the Modified BSD-3 License as published by the Open Source
+Initiative.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE. See the BSD-3 License for more details.
+
+You should have received a copy of the Modified BSD-3 License along with this
+program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
+"""
+
+from nn_dataflow.core import Network
+from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \
+        PoolingLayer, EltwiseLayer
+
+'''
+ResNet-50
+
+He, Zhang, Ren, and Sun, 2015
+'''
+
+NN = Network('ResNet')
+
+NN.set_input_layer(InputLayer(3, 224))
+
+NN.add('conv1', ConvLayer(3, 64, 112, 7, 2))
+NN.add('pool1', PoolingLayer(64, 56, 3, 2))
+
+RES_PREV = 'pool1'
+
+for i in range(3):
+    NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1))
+    NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3))
+    NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1))
+
+    # With residual shortcut.
+    if i == 0:
+        NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,))
+        RES_PREV = 'conv2_br'
+    NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2),
+           prevs=(RES_PREV, 'conv2_{}_c'.format(i)))
+    RES_PREV = 'conv2_{}_res'.format(i)
+
+for i in range(4):
+    NN.add('conv3_{}_a'.format(i),
+           ConvLayer(256, 128, 28, 1, 2) if i == 0
+           else ConvLayer(512, 128, 28, 1))
+    NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3))
+    NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1))
+
+    # With residual shortcut.
+    if i == 0:
+        NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,))
+        RES_PREV = 'conv3_br'
+    NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2),
+           prevs=(RES_PREV, 'conv3_{}_c'.format(i)))
+    RES_PREV = 'conv3_{}_res'.format(i)
+
+for i in range(6):
+    NN.add('conv4_{}_a'.format(i),
+           ConvLayer(512, 256, 14, 1, 2) if i == 0
+           else ConvLayer(1024, 256, 14, 1))
+    NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3))
+    NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1))
+
+    # With residual shortcut.
+    if i == 0:
+        NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,))
+        RES_PREV = 'conv4_br'
+    NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2),
+           prevs=(RES_PREV, 'conv4_{}_c'.format(i)))
+    RES_PREV = 'conv4_{}_res'.format(i)
+
+for i in range(3):
+    NN.add('conv5_{}_a'.format(i),
+           ConvLayer(1024, 512, 7, 1, 2) if i == 0
+           else ConvLayer(2048, 512, 7, 1))
+    NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3))
+    NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1))
+
+    # With residual shortcut.
+    if i == 0:
+        NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,))
+        RES_PREV = 'conv5_br'
+    NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2),
+           prevs=(RES_PREV, 'conv5_{}_c'.format(i)))
+    RES_PREV = 'conv5_{}_res'.format(i)
+
+NN.add('pool5', PoolingLayer(2048, 1, 7))
+
+NN.add('fc', FCLayer(2048, 1000))