Skip to content

Commit

Permalink
Merge 42f2b9a into 5ba0fb1
Browse files Browse the repository at this point in the history
  • Loading branch information
derange-alembic committed Dec 29, 2019
2 parents 5ba0fb1 + 42f2b9a commit b68d642
Show file tree
Hide file tree
Showing 50 changed files with 498 additions and 388 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ eggs/
.eggs/
*.egg-info/
*.egg

# Editor related.
settings.json
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: python

python:
- 2.7
- 3.6

install:
- pip install .
Expand Down
2 changes: 1 addition & 1 deletion nn_dataflow/core/data_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def nhops_to(self, fmap_range, *dest_list, **kwargs):
# Each forward step, get the min-distance pair of source and
# destination.
src, dst = min(itertools.product(src_set, dst_set),
key=lambda (s, d): d.hop_dist(s))
key=lambda sd: sd[1].hop_dist(sd[0]))
dst_set.remove(dst)
src_set.add(dst)
nhops += total_size * dst.hop_dist(src)
Expand Down
37 changes: 18 additions & 19 deletions nn_dataflow/core/fmap_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,26 @@ def __contains__(self, fpos):
in zip(fpos, self.fp_beg, self.fp_end))

def __lt__(self, other):
if isinstance(other, self.__class__):
return self._compare(other) < 0
return NotImplemented
assert isinstance(other, self.__class__), \
"FmapRange: invalid type to compare: {}".format(type(other))
return self._compare(other) < 0

def __eq__(self, other):
if isinstance(other, self.__class__):
try:
return self._compare(other) == 0
except ValueError:
return False
return NotImplemented
assert isinstance(other, self.__class__), \
"FmapRange: invalid type to compare: {}".format(type(other))
try:
return self._compare(other) == 0
except ValueError:
return False

def __hash__(self):
'''
If a class does not define an __eq__() method, it should not define a
__hash__() operation either; if it defines __eq__() but not __hash__(),
its instances will not be usable as items in hashable collections.
See https://docs.python.org/3.1/reference/datamodel.html?highlight=hash#object.__hash__
'''
return hash((self.fp_beg, self.fp_end))

def _compare(self, other):
# Identical ranges.
Expand Down Expand Up @@ -181,27 +190,17 @@ def __repr__(self):

def __ne__(self, other):
r = self.__eq__(other)
if r is NotImplemented:
# "not" NotImplemented will be True.
return r
return not r

def __gt__(self, other):
r = self.__lt__(other)
if r is NotImplemented:
# NotImplemented "and" X will be X.
return r
return not r and self.__ne__(other)

def __le__(self, other):
# NotImplemented "or" X is safe.
return self.__lt__(other) or self.__eq__(other)

def __ge__(self, other):
r = self.__lt__(other)
if r is NotImplemented:
# "not" NotImplemented will be True.
return r
return not r


Expand Down
8 changes: 5 additions & 3 deletions nn_dataflow/core/loop_blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import heapq
import itertools
from multiprocessing import Pool
from multiprocessing.pool import Pool

from . import loop_blocking_solver
from . import loop_enum as le
Expand Down Expand Up @@ -65,7 +65,7 @@ def skip_conv(bl_ts, bl_ords):

outer_level_innermost_nt_loop = None

for t_, ord_ in itertools.izip_longest(bl_ts, bl_ords, fillvalue=None):
for t_, ord_ in itertools.zip_longest(bl_ts, bl_ords, fillvalue=None):

# Non-trivial loops.
nt_loops = [lpe for lpe in range(le.NUM) if t_[lpe] > 1]
Expand Down Expand Up @@ -130,6 +130,7 @@ def _gen_bl_ts():
def _sweep():
''' Sweep all. '''
is_conv_loops = (nested_loop_desc.data_loops == ConvLayer.data_loops())
counter = 0
for bl_ts, bl_ords in itertools.product(_gen_bl_ts(), gen_ords):
if is_conv_loops and skip_conv(bl_ts, bl_ords):
continue
Expand All @@ -138,6 +139,7 @@ def _sweep():
lbs = LoopBlockingScheme(
nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
options)
counter += 1
yield lbs

return heapq.nsmallest(options.ntops, _sweep(),
Expand Down Expand Up @@ -188,7 +190,7 @@ def retrieve_result_st():
retrieve_func = retrieve_result()
else:
pool = None
apply_func = apply
apply_func = util.apply
retrieve_func = retrieve_result_st()

# Exhaustive generators.
Expand Down
18 changes: 10 additions & 8 deletions nn_dataflow/core/loop_blocking_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
'LoopBlockingScheme: bl_ts elements have invalid length.'
assert len(bl_ords) == BL.NUM, \
'LoopBlockingScheme: bl_ords has invalid length.'
assert all(sorted(bl_ord) == range(le.NUM) for bl_ord in bl_ords), \
assert all(tuple(sorted(bl_ord)) == tuple(range(le.NUM)) \
for bl_ord in bl_ords), \
'LoopBlockingScheme: bl_ords elements are invalid.'

self.bl_ts = [tuple(bl_t) for bl_t in bl_ts]
Expand Down Expand Up @@ -143,13 +144,7 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
self.src_is_dram = (resource.src_data_region.type == NodeRegion.DRAM)
self.dst_is_dram = (resource.dst_data_region.type == NodeRegion.DRAM)

# Check resource for filter pinning.
self.filter_pinned = False
if resource.no_time_mux:
if all(self.bl_ts[0][lpe] == 1 for lpe
in self.nld.data_loops[de.FIL].loops()):
self.filter_pinned = True
self.fetch[0][de.FIL] = 0

# If data regions are not DRAM, can only access once, no spilling.
if not self.src_is_dram:
Expand Down Expand Up @@ -215,6 +210,13 @@ def __init__(self, nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
# Remote gbuf access.
self.remote_gbuf_access = [0.] * de.NUM

# Check resource for filter pinning.
if resource.no_time_mux:
if all(self.bl_ts[0][lpe] == 1 for lpe
in self.nld.data_loops[de.FIL].loops()):
self.filter_pinned = True
self.fetch[0][de.FIL] = 0

def is_valid(self):
'''
Whether is a valid scheme.
Expand Down Expand Up @@ -572,7 +574,7 @@ def _gen_index_single_level(t_x, order_x):
gens = [None] * le.NUM
rev_order = [le.NUM - 1 - o for o in order_x]
for lpe in range(le.NUM):
gens[rev_order[lpe]] = xrange(t_x[lpe])
gens[rev_order[lpe]] = range(t_x[lpe])

for idx in itertools.product(*gens):
# Index now is in the loop order from outer to inner. Reorder to be
Expand Down
2 changes: 1 addition & 1 deletion nn_dataflow/core/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def add(self, layer_name, layer, prevs=None):
'has not been added to the network'.
format(p))
else:
prevs = (self.layer_dict.keys()[-1],)
prevs = (list(self.layer_dict.keys())[-1],)

self.layer_dict[layer_name] = layer
self.prevs_dict[layer_name] = prevs
Expand Down
29 changes: 9 additions & 20 deletions nn_dataflow/core/nn_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,23 +325,12 @@ def _gen_input_layout(self, options):
regions=(input_region,),
parts=(part.projection(input_region, appl2frng=True),))

if ext_layers:
for ext_parts in itertools.product(
*[partition.gen_partition(ext_layer, self.batch_size,
ext_region.dim, options,
guaranteed=True)
for ext_layer in ext_layers]):
ext_layout_dict = dict(zip(
ext_layer_names,
[DataLayout(
frngs=(ext_frng,),
regions=(ext_region,),
parts=(ext_part.projection(ext_region,
appl2frng=True),))
for ext_part, ext_frng in zip(ext_parts, ext_frngs)]))

yield input_layout, ext_layout_dict

else:
yield input_layout, None

ext_layout_dict = dict(zip(
ext_layer_names,
[DataLayout(
frngs=(ext_frng,),
regions=(ext_region,),
parts=(part.projection(ext_region, appl2frng=True),))
for ext_frng in ext_frngs])) if ext_layers else None

yield input_layout, ext_layout_dict
2 changes: 1 addition & 1 deletion nn_dataflow/core/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def _unit_nhops_to_fil(layer, filter_nodes, fil_dict, fwd=False):
# Each forward step, get the min-distance pair of source and
# destination.
src, dst = min(itertools.product(src_set, dst_set),
key=lambda (s, d): d.hop_dist(s))
key=lambda sd: sd[1].hop_dist(sd[0]))
dst_set.remove(dst)
src_set.add(dst)
nhops += fil_size * dst.hop_dist(src)
Expand Down
1 change: 1 addition & 0 deletions nn_dataflow/core/phy_dim2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""

from collections import namedtuple
from functools import reduce
from operator import add, sub, neg, mul

class PhyDim2(namedtuple('PhyDim2', ['h', 'w'])):
Expand Down
4 changes: 3 additions & 1 deletion nn_dataflow/core/pipeline_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def gen_constraint(self, max_time_overhead=float('inf')):
# hint A are larger than the corresponding values in hint B, A will be
# generated after B.
vals = [sorted(v) for v in vals]
syms = list(syms)

if self.cstr_topbat_idx is not None:
# Tovhd = (1 + 1/to + 1 + 1/to + ...) / tb
Expand All @@ -124,7 +125,8 @@ def gen_constraint(self, max_time_overhead=float('inf')):

constraint = tuple()

for atpl in self._subs_symargs(self.cstr_symargs, zip(syms, valp)):
for atpl in self._subs_symargs(self.cstr_symargs,
tuple(zip(syms, valp))):
ctpl = tuple()
for a in atpl:
# Construct kwargs, adjust the types of the values.
Expand Down
2 changes: 1 addition & 1 deletion nn_dataflow/core/scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def _get_result(self, lbs, part, ofmap_layout, sched_seq, unit_nhops):
scheme['fetch'] = lbs.fetch

# Loop blocking.
lp_ts = zip(*lbs.bl_ts)
lp_ts = list(zip(*lbs.bl_ts))
scheme['ti'] = tuple(lp_ts[le.IFM])
scheme['to'] = tuple(lp_ts[le.OFM])
scheme['tb'] = tuple(lp_ts[le.BAT])
Expand Down
95 changes: 95 additions & 0 deletions nn_dataflow/nns/resnet50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
""" $lic$
Copyright (C) 2016-2019 by The Board of Trustees of Stanford University
This program is free software: you can redistribute it and/or modify it under
the terms of the Modified BSD-3 License as published by the Open Source
Initiative.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the BSD-3 License for more details.
You should have received a copy of the Modified BSD-3 License along with this
program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
"""

from nn_dataflow.core import Network
from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \
PoolingLayer, EltwiseLayer

'''
ResNet-50
He, Zhang, Ren, and Sun, 2015
'''

NN = Network('ResNet')

NN.set_input_layer(InputLayer(3, 224))

NN.add('conv1', ConvLayer(3, 64, 112, 7, 2))
NN.add('pool1', PoolingLayer(64, 56, 3, 2))

RES_PREV = 'pool1'

for i in range(3):
NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1))
NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3))
NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1))

# With residual shortcut.
if i == 0:
NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,))
RES_PREV = 'conv2_br'
NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2),
prevs=(RES_PREV, 'conv2_{}_c'.format(i)))
RES_PREV = 'conv2_{}_res'.format(i)

for i in range(4):
NN.add('conv3_{}_a'.format(i),
ConvLayer(256, 128, 28, 1, 2) if i == 0
else ConvLayer(512, 128, 28, 1))
NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3))
NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1))

# With residual shortcut.
if i == 0:
NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,))
RES_PREV = 'conv3_br'
NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2),
prevs=(RES_PREV, 'conv3_{}_c'.format(i)))
RES_PREV = 'conv3_{}_res'.format(i)

for i in range(6):
NN.add('conv4_{}_a'.format(i),
ConvLayer(512, 256, 14, 1, 2) if i == 0
else ConvLayer(1024, 256, 14, 1))
NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3))
NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1))

# With residual shortcut.
if i == 0:
NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,))
RES_PREV = 'conv4_br'
NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2),
prevs=(RES_PREV, 'conv4_{}_c'.format(i)))
RES_PREV = 'conv4_{}_res'.format(i)

for i in range(3):
NN.add('conv5_{}_a'.format(i),
ConvLayer(1024, 512, 7, 1, 2) if i == 0
else ConvLayer(2048, 512, 7, 1))
NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3))
NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1))

# With residual shortcut.
if i == 0:
NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,))
RES_PREV = 'conv5_br'
NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2),
prevs=(RES_PREV, 'conv5_{}_c'.format(i)))
RES_PREV = 'conv5_{}_res'.format(i)

NN.add('pool5', PoolingLayer(2048, 1, 7))

NN.add('fc', FCLayer(2048, 1000))

0 comments on commit b68d642

Please sign in to comment.