-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Release 0.1.0/sparsify 2019 02 18 (#11)
* Attempt to test for #4 PyTorch's boolean comparison crap isn't useful and makes it a pain to test exact tensor values. * Will resume later * Skipping sparsify test It's a painfully simple function that has worked every time I've used it. - No it doesn't handle every edge case + Yes, it gets the job done and can be packaged for the general case * Use instance `.nonzero()` instead of `torch.nonzero()` * Fix "type-check" in layer inspectors * WIP: Implement shrink() in terms of resize_layers() It was as easy as I wanted it to be. * The complexity is how to handle a given nested layer + Those will get implemented with a given feature - Need to program feature detection TODO: + Implement the resizing on a layer-by-layer case, to make the shrinking a bit different + Instead of applying the data transformation uniformly, each layer gets + Those factors will be computed as 1 - percent_waste(layer) * Lay out skeleton for the true shrinking algo #4 * shrink_layer() is simple * Justification for giving Shrinkage a 'input_dimensions' property: > The thought is that channel depth doesn't change the output dimensions for CNNs, and that's attribute we're concerned with in the convulotional case... * Linear layers only have two dimensions, so it's a huge deal there. * RNNs do linear things over 'timesteps', so it's a big deal there. * Residual/identity/skip-connections in CNNs need this. > __It's decided__. The attribute stays
- Loading branch information
1 parent
24bb995
commit 57d0433
Showing
7 changed files
with
313 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import morph" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"<module 'morph.nn' from '/Users/stephen/Documents/Insight-AI/Insight-AI-Fellowship-Project/src/morph/nn/__init__.py'>" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"morph.nn" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"??morph.nn.once" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import morph.nn.shrink as ms" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from morph.testing.models import EasyMnist" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"0" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"some_linear = ms.nn.Linear(3, 2)\n", | ||
"c = [c for c in some_linear.children()]\n", | ||
"len(c)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"EasyMnist(\n", | ||
" (linear1): Linear(in_features=784, out_features=1000, bias=True)\n", | ||
" (linear2): Linear(in_features=1000, out_features=30, bias=True)\n", | ||
" (linear3): Linear(in_features=30, out_features=10, bias=True)\n", | ||
")" | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"EasyMnist()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"Module(\n", | ||
" (linear1): Linear(in_features=784, out_features=700, bias=True)\n", | ||
" (linear2): Linear(in_features=700, out_features=21, bias=True)\n", | ||
" (linear3): Linear(in_features=21, out_features=10, bias=True)\n", | ||
")" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"ms.prune(EasyMnist())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.2" | ||
}, | ||
"toc": { | ||
"base_numbering": 1, | ||
"nav_menu": {}, | ||
"number_sections": true, | ||
"sideBar": true, | ||
"skip_h1_title": false, | ||
"title_cell": "Table of Contents", | ||
"title_sidebar": "Contents", | ||
"toc_cell": false, | ||
"toc_position": {}, | ||
"toc_section_display": true, | ||
"toc_window_display": false | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from unittest import main as test_main, TestCase, skip | ||
|
||
from .sparse import sparsify, torch | ||
|
||
class TestSparseFunctions(TestCase): | ||
|
||
@skip("Skipping value-wise comparison until better solution than iterating all tensor values") | ||
def test_sparsify_selected_indices_should_have_sub_threshold_values(self): | ||
test_threshold = 0.1 | ||
test_tensor = torch.randn(3, 2) | ||
expected = torch.where(test_tensor > test_threshold, test_tensor, torch.zeros(3, 2)) | ||
self.assertEqual(expected, sparsify(test_tensor, test_threshold)) | ||
|
||
|
||
if __name__ == "__main__": | ||
test_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from collections import namedtuple | ||
|
||
Resizing = namedtuple('Resizing', ['input_size', 'output_size'], defaults=[0, 0]) | ||
Resizing.__doc__ += ': Baseclass for a type that encapsulates a resized layer' | ||
Resizing.input_size.__doc__ = "The layer's \"new\" input dimension size (Linear -> in_features, Conv2d -> in_channels)" | ||
Resizing.output_size.__doc__ = "The layer's \"new\" output dimension size (Linear -> out_features, Conv2d -> out_channels)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,103 @@ | ||
from morph.layers.sparse import percent_waste | ||
from morph._utils import check, round | ||
from morph.nn.utils import in_dim, out_dim | ||
from morph.utils import check, round | ||
from .resizing import Resizing | ||
from .utils import in_dim, out_dim, group_layers_by_algo | ||
from .widen import resize_layers | ||
from ._types import type_name | ||
|
||
from typing import List | ||
|
||
import torch.nn as nn | ||
|
||
|
||
def calc_reduced_size(layer: nn.Module) -> (int, int): | ||
"""Calculates the reduced size of the layer, post training (initial or morphed re-training) | ||
so the layers can be resized. | ||
class Shrinkage: | ||
""" | ||
An intermediary for the "Shrink" step of the three step Morphing algorithm. | ||
Rather than have all of the state be free in the small scope of a mega-function, | ||
these abstractions ease the way of implementing the shrinking and prune of the | ||
network. | ||
* Given that we have access to the total count of nodes, and how wasteful a layer was | ||
we can deduce any necessary changes once given a new input dimension | ||
* We expect input dimensions to change to accomodate the trimmed down earlier layers, | ||
but we want an expansion further along to allow the opening of bottlenecks in the architecture | ||
""" | ||
|
||
def __init__(self, input_dimension: int, initial_parameters: int, | ||
waste_percentage: float): | ||
self.input_dimension = input_dimension # TODO: is this relevant in any non-Linear case? | ||
self.initial_parameters = initial_parameters | ||
self.waste_percentage = waste_percentage | ||
self.reduced_parameters = Shrinkage.reduce_parameters(initial_parameters, | ||
waste_percentage) | ||
|
||
@staticmethod | ||
def reduce_parameters(initial_parameters: int, waste: float) -> int: | ||
"""Calculates the new, smaller, number of paratemers that this instance encapsulates""" | ||
percent_keep = (1. - waste) | ||
unrounded_params_to_keep = percent_keep * initial_parameters | ||
# round digital up to the nearest integer | ||
return round(unrounded_params_to_keep) | ||
|
||
|
||
def shrink_to_resize(shrinkage: Shrinkage, new_input_dimension: int) -> Resizing: | ||
"""Given the `new_input_dimension`, calculate a reshaping/resizing for the parameters | ||
of the supplied `shrinkage`. | ||
We round up the new output dimension, generously allowing for opening bottlenecks. | ||
Iteratively, any waste introduced is pruned hereafter. (Needs proof/unit test) | ||
""" | ||
# TODO: remove this guard when properly we protect access to this function | ||
check( | ||
type(layer) == nn.Conv2d or type(layer) == nn.Linear, | ||
'Invalid layer type: ' + type(layer)) | ||
new_output_dimension = round(shrinkage.reduced_parameters / new_input_dimension) | ||
return Resizing(new_input_dimension, new_output_dimension) | ||
|
||
|
||
#################### prove of a good implementation #################### | ||
|
||
|
||
def uniform_prune(net: nn.Module) -> nn.Module: | ||
"""Shrink the network down 70%. Input and output dimensions are not altered""" | ||
return resize_layers(net, width_factor=0.7) | ||
|
||
|
||
#################### the algorithm to end all algorithms #################### | ||
|
||
|
||
def shrink_layer(layer: nn.Module) -> Shrinkage: | ||
waste = percent_waste(layer) | ||
parameter_count = layer.weight.numel() # the count is already tracked for us | ||
return Shrinkage(in_dim(layer), parameter_count, waste) | ||
|
||
|
||
def fit_layer_sizes(layer_sizes: List[Shrinkage]) -> List[Resizing]: | ||
# TODO: where's the invocation site for shrink_to_resize | ||
pass | ||
|
||
|
||
def transform(original_layer: nn.Module, new_shape: Resizing) -> nn.Module: | ||
# TODO: this might just be utils.redo_layer, without the primitive obsession | ||
pass | ||
|
||
|
||
def shrink_prune_fit(net: nn.Module) -> nn.Module: | ||
first, middle_layers, last = group_layers_by_algo(net) | ||
shrunk = { | ||
"first": shrink_layer(first), | ||
"middle": [shrink_layer(m) for m in middle_layers], | ||
"last": shrink_layer(last) | ||
} | ||
|
||
# FIXME: why doesn't the linter like `fitted_layers` | ||
fitted_layers = fit_layer_sizes([shrunk["first"], *shrunk["middle"], shrunk["last"]]) | ||
|
||
# iteration very similar to `resize_layers` but matches Shrinkage with the corresponding layer | ||
new_first, new_middle_layers, new_last = group_layers_by_algo(fitted_layers) | ||
|
||
new_net = nn.Module() | ||
|
||
new_net.add_module(type_name(first), transform(first, new_first)) | ||
|
||
for old, new in zip(middle_layers, new_middle_layers): | ||
new_net.add_module(type_name(old), transform(old, new)) | ||
pass # append to new_net with the Shrinkage's properties | ||
|
||
percent_keep = 1 - percent_waste(layer) | ||
shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer) | ||
new_net.add_module(type_name(last), transform(last, new_last)) | ||
|
||
return round(shrunk_in), round(shrunk_out) | ||
return new_net |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.