Skip to content

Commit

Permalink
Release 0.1.0/sparsify 2019 02 18 (#11)
Browse files Browse the repository at this point in the history
* Attempt to test for #4

PyTorch's boolean comparison crap isn't useful and makes
  it a pain to test exact tensor values.
* Will resume later

* Skipping sparsify test

It's a painfully simple function that has worked every time
  I've used it.
- No it doesn't handle every edge case
+ Yes, it gets the job done and can be packaged for the general case

* Use instance `.nonzero()` instead of `torch.nonzero()`

* Fix "type-check" in layer inspectors

* WIP: Implement shrink() in terms of resize_layers()

It was as easy as I wanted it to be.
* The complexity is how to handle a given nested layer
  + Those will get implemented with a given feature
  - Need to program feature detection

TODO:
+ Implement the resizing on a layer-by-layer case, to
  make the shrinking a bit different
  + Instead of applying the data transformation uniformly,
    each layer gets
  + Those factors will be computed as 1 - percent_waste(layer)

* Lay out skeleton for the true shrinking algo #4

* shrink_layer() is simple

* Justification for giving Shrinkage a 'input_dimensions' property:

> The thought is that channel depth doesn't change the output dimensions for CNNs, and that's
  attribute we're concerned with in the convulotional case...
  * Linear layers only have two dimensions, so it's a huge deal there.
  * RNNs do linear things over 'timesteps', so it's a big deal there.
  * Residual/identity/skip-connections in CNNs need this.

> __It's decided__. The attribute stays
  • Loading branch information
stephenjfox committed Feb 19, 2019
1 parent 24bb995 commit 57d0433
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 22 deletions.
171 changes: 171 additions & 0 deletions check-prune-widen.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import morph"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'morph.nn' from '/Users/stephen/Documents/Insight-AI/Insight-AI-Fellowship-Project/src/morph/nn/__init__.py'>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"morph.nn"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"??morph.nn.once"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import morph.nn.shrink as ms"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from morph.testing.models import EasyMnist"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"some_linear = ms.nn.Linear(3, 2)\n",
"c = [c for c in some_linear.children()]\n",
"len(c)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EasyMnist(\n",
" (linear1): Linear(in_features=784, out_features=1000, bias=True)\n",
" (linear2): Linear(in_features=1000, out_features=30, bias=True)\n",
" (linear3): Linear(in_features=30, out_features=10, bias=True)\n",
")"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"EasyMnist()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Module(\n",
" (linear1): Linear(in_features=784, out_features=700, bias=True)\n",
" (linear2): Linear(in_features=700, out_features=21, bias=True)\n",
" (linear3): Linear(in_features=21, out_features=10, bias=True)\n",
")"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ms.prune(EasyMnist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion morph/layers/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def percent_waste(layer: nn.Module) -> float:
weight matrix/tensor to determine how many neurons can be spared
"""
w = layer.weight
non_sparse_w = torch.nonzero(sparsify(w))
non_sparse_w = sparsify(w).nonzero()
non_zero_count = non_sparse_w.numel() // len(non_sparse_w[0])

percent_size = non_zero_count / w.numel()
Expand Down
16 changes: 16 additions & 0 deletions morph/layers/sparse_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from unittest import main as test_main, TestCase, skip

from .sparse import sparsify, torch

class TestSparseFunctions(TestCase):

@skip("Skipping value-wise comparison until better solution than iterating all tensor values")
def test_sparsify_selected_indices_should_have_sub_threshold_values(self):
test_threshold = 0.1
test_tensor = torch.randn(3, 2)
expected = torch.where(test_tensor > test_threshold, test_tensor, torch.zeros(3, 2))
self.assertEqual(expected, sparsify(test_tensor, test_threshold))


if __name__ == "__main__":
test_main()
6 changes: 6 additions & 0 deletions morph/nn/resizing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from collections import namedtuple

Resizing = namedtuple('Resizing', ['input_size', 'output_size'], defaults=[0, 0])
Resizing.__doc__ += ': Baseclass for a type that encapsulates a resized layer'
Resizing.input_size.__doc__ = "The layer's \"new\" input dimension size (Linear -> in_features, Conv2d -> in_channels)"
Resizing.output_size.__doc__ = "The layer's \"new\" output dimension size (Linear -> out_features, Conv2d -> out_channels)"
107 changes: 95 additions & 12 deletions morph/nn/shrink.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,103 @@
from morph.layers.sparse import percent_waste
from morph._utils import check, round
from morph.nn.utils import in_dim, out_dim
from morph.utils import check, round
from .resizing import Resizing
from .utils import in_dim, out_dim, group_layers_by_algo
from .widen import resize_layers
from ._types import type_name

from typing import List

import torch.nn as nn


def calc_reduced_size(layer: nn.Module) -> (int, int):
"""Calculates the reduced size of the layer, post training (initial or morphed re-training)
so the layers can be resized.
class Shrinkage:
"""
An intermediary for the "Shrink" step of the three step Morphing algorithm.
Rather than have all of the state be free in the small scope of a mega-function,
these abstractions ease the way of implementing the shrinking and prune of the
network.
* Given that we have access to the total count of nodes, and how wasteful a layer was
we can deduce any necessary changes once given a new input dimension
* We expect input dimensions to change to accomodate the trimmed down earlier layers,
but we want an expansion further along to allow the opening of bottlenecks in the architecture
"""

def __init__(self, input_dimension: int, initial_parameters: int,
waste_percentage: float):
self.input_dimension = input_dimension # TODO: is this relevant in any non-Linear case?
self.initial_parameters = initial_parameters
self.waste_percentage = waste_percentage
self.reduced_parameters = Shrinkage.reduce_parameters(initial_parameters,
waste_percentage)

@staticmethod
def reduce_parameters(initial_parameters: int, waste: float) -> int:
"""Calculates the new, smaller, number of paratemers that this instance encapsulates"""
percent_keep = (1. - waste)
unrounded_params_to_keep = percent_keep * initial_parameters
# round digital up to the nearest integer
return round(unrounded_params_to_keep)


def shrink_to_resize(shrinkage: Shrinkage, new_input_dimension: int) -> Resizing:
"""Given the `new_input_dimension`, calculate a reshaping/resizing for the parameters
of the supplied `shrinkage`.
We round up the new output dimension, generously allowing for opening bottlenecks.
Iteratively, any waste introduced is pruned hereafter. (Needs proof/unit test)
"""
# TODO: remove this guard when properly we protect access to this function
check(
type(layer) == nn.Conv2d or type(layer) == nn.Linear,
'Invalid layer type: ' + type(layer))
new_output_dimension = round(shrinkage.reduced_parameters / new_input_dimension)
return Resizing(new_input_dimension, new_output_dimension)


#################### prove of a good implementation ####################


def uniform_prune(net: nn.Module) -> nn.Module:
"""Shrink the network down 70%. Input and output dimensions are not altered"""
return resize_layers(net, width_factor=0.7)


#################### the algorithm to end all algorithms ####################


def shrink_layer(layer: nn.Module) -> Shrinkage:
waste = percent_waste(layer)
parameter_count = layer.weight.numel() # the count is already tracked for us
return Shrinkage(in_dim(layer), parameter_count, waste)


def fit_layer_sizes(layer_sizes: List[Shrinkage]) -> List[Resizing]:
# TODO: where's the invocation site for shrink_to_resize
pass


def transform(original_layer: nn.Module, new_shape: Resizing) -> nn.Module:
# TODO: this might just be utils.redo_layer, without the primitive obsession
pass


def shrink_prune_fit(net: nn.Module) -> nn.Module:
first, middle_layers, last = group_layers_by_algo(net)
shrunk = {
"first": shrink_layer(first),
"middle": [shrink_layer(m) for m in middle_layers],
"last": shrink_layer(last)
}

# FIXME: why doesn't the linter like `fitted_layers`
fitted_layers = fit_layer_sizes([shrunk["first"], *shrunk["middle"], shrunk["last"]])

# iteration very similar to `resize_layers` but matches Shrinkage with the corresponding layer
new_first, new_middle_layers, new_last = group_layers_by_algo(fitted_layers)

new_net = nn.Module()

new_net.add_module(type_name(first), transform(first, new_first))

for old, new in zip(middle_layers, new_middle_layers):
new_net.add_module(type_name(old), transform(old, new))
pass # append to new_net with the Shrinkage's properties

percent_keep = 1 - percent_waste(layer)
shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer)
new_net.add_module(type_name(last), transform(last, new_last))

return round(shrunk_in), round(shrunk_out)
return new_net
16 changes: 10 additions & 6 deletions morph/nn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,26 @@ def make_children_list(children_or_named_children):


def in_dim(layer: nn.Module) -> int:
check(type_supported(layer))
"""Returns the input dimension of a given (supported) `layer`"""
layer_name = type_name(layer)
check(type_supported(layer_name))

if layer_is_linear(layer):
if layer_is_linear(layer_name):
return layer.in_features
elif layer_is_conv2d(layer):
elif layer_is_conv2d(layer_name):
return layer.in_channels
else:
raise RuntimeError('Inspecting on unsupported layer')


def out_dim(layer: nn.Module) -> int:
check(type_supported(layer))
"""Returns the output dimension of a given (supported) `layer`"""
layer_name = type_name(layer)
check(type_supported(layer_name))

if layer_is_linear(layer):
if layer_is_linear(layer_name):
return layer.out_features
elif layer_is_conv2d(layer):
elif layer_is_conv2d(layer_name):
return layer.out_channels
else:
raise RuntimeError('Inspecting on unsupported layer')
Expand Down

0 comments on commit 57d0433

Please sign in to comment.