-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'release/v0.1dev' into main
- Loading branch information
Showing
16 changed files
with
862 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/tmp.* | ||
/local | ||
*.log | ||
*.pyc | ||
__pycache__ | ||
docker-nom/build/ | ||
\#*\# | ||
.\#* | ||
venv/ | ||
*.egg-info/ | ||
docs/build/ | ||
.coverage* | ||
htmlcov/ | ||
.ipynb_checkpoints/ | ||
.pytest_cache | ||
build/ | ||
dist/ | ||
|
||
# Subversion | ||
.svn | ||
|
||
|
||
# Vim (and some others) | ||
*~ | ||
*.swp | ||
|
||
# Eclipse | ||
.cache | ||
.classpath | ||
.project | ||
.settings | ||
build-eclipse | ||
|
||
# Gradle | ||
.gradle | ||
gradle-app.setting | ||
|
||
# Sublime Text | ||
*.sublime-workspace | ||
|
||
# NetBeans | ||
.netbeans | ||
catalog.xml | ||
generated | ||
nb-configuration.xml | ||
|
||
# Mac OS | ||
.DS_Store | ||
__MACOSX | ||
|
||
# Intellij | ||
.idea/ | ||
.idea/workspace.xml | ||
.idea/libraries | ||
.idea/kotlinc.xml | ||
.idea/tasks.xml | ||
.idea/dictionaries | ||
.idea/scopes | ||
.idea/artifacts | ||
.idea/uiDesigner.xml | ||
.idea/dataSources.local.xml | ||
.idea/dataSources.local.xml | ||
.idea/dataSources.ids | ||
.idea/dataSources.xml | ||
.idea/dataSources | ||
.idea/kotlinc.xml | ||
.idea/sonar* | ||
*.iws | ||
*.iml | ||
.idea/compiler.xml | ||
.idea/kotlinc.xml | ||
.idea/inspectionProfiles/profiles_settings.xml | ||
.idea/misc.xml | ||
.idea/modules.xml | ||
|
||
# vscode | ||
.vscode | ||
|
||
# dask | ||
dask-worker-space/ | ||
|
||
# Generated data | ||
*.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Version 0.1dev Migrated to github, 10/14/2021 | ||
|
||
This version moved from code.ornl.gov repository to github to facilitate | ||
use as an open-source project. | ||
|
||
Version 0.0 Migrated from internal repository, 7/13/2021 | ||
|
||
Migrated from internal git repository to code.ornl.gov, and generalized source to be more | ||
readily applicable to new problems. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Gremlin | ||
|
||
Gremlin is a machine learning model evaluator. Find out where your model performs poorly. | ||
|
||
## Requires | ||
* Python 3.[78] | ||
* LEAP (https://github.com/AureumChaos/LEAP) | ||
|
||
## How it works | ||
It utilizes an adversarial evolutionary algorithm (EA) to find features where a model | ||
performs poorly. The intent is for the user to leverage that information to tune training | ||
data for subsequent model retraining to improve performance in those poor performing situations. | ||
|
||
## Configuration | ||
At a bare minimum, Gremlin needs an algorithm, a `Problem`, and a `Representation`. The | ||
`Problem` and `Representation` should inherit from `leap_ec.problem.Problem` and | ||
`leap_ec.representation.Representation`, respectively. The model to evaluate should be | ||
handled within the custom `Problem` class. | ||
|
||
Example configuration: | ||
|
||
``` | ||
evolution: | ||
name: leap_ec.algorithm.generational_ea *or* custom_generator_function | ||
params: | ||
max_generations: 50 | ||
pop_size: 25 | ||
problem: | ||
name: leap_ec.problem.Problem *or* custom_class | ||
params: | ||
maximize: False | ||
representation: | ||
name: leap_ec.representation.Representation *or* custom_class | ||
params: | ||
initialize: | ||
name: curried_initializer_function (see leap_ec.int_rep.create_int_vector) | ||
params: {} | ||
analysis: | ||
name: analysis_function | ||
``` | ||
|
||
The `name:` field specifies the function or class to import. If this field is followed | ||
by `params:` it will attempt to instantiate the function or class with the arguments that | ||
follow prior to running the evolutionary algorithm. | ||
|
||
## Example | ||
Example code and configuration for a real problem can be found in `examples/MNIST`. | ||
This problem involves Gremlin evolving patterns of occlusion (graying-out pixels of an | ||
image) in order to cause a convolutional neural network to perform poorly on digit | ||
recognition. | ||
|
||
This can be run simply by (must be in `examples/MNIST` directory): | ||
|
||
``` | ||
$ gremlin MNIST_config.yml | ||
``` | ||
|
||
## Sub-directories | ||
* `gremlin/` -- main `gremlin` code | ||
* `examples/` -- examples for using gremlin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Configuration file for Gremlin | ||
# Usage: | ||
# $ gremlin MNIST_config.yml | ||
|
||
# these variables are defined in multiple places | ||
pop_size: 50 | ||
bounds: [[0, 55], [0, 55], [0, 55], [0, 55], [0, 55], [0, 55]] | ||
|
||
|
||
# this defines the evolutionary algorithm and its parameters | ||
evolution: | ||
name: leap_ec.algorithm.generational_ea | ||
params: | ||
max_generations: 30 | ||
pop_size: ${pop_size} | ||
problem: | ||
name: gremlin.problem.DatasetProblem | ||
params: | ||
maximize: True | ||
model: | ||
name: MNIST_example.LeNet | ||
params: | ||
checkpoint_path: ./data/model.pt | ||
metric: | ||
name: torch.nn.functional.cross_entropy | ||
generator: | ||
name: MNIST_example.MNISTRowColOcclusionGenerator | ||
params: | ||
batch_size: 500 | ||
representation: | ||
name: leap_ec.representation.Representation | ||
params: | ||
initialize: | ||
name: leap_ec.int_rep.initializers.create_int_vector | ||
params: | ||
bounds: ${bounds} | ||
decoder: | ||
name: MNIST_example.RowColDecoder | ||
params: {} | ||
pipeline: | ||
- name: leap_ec.ops.tournament_selection | ||
params: | ||
k: 5 | ||
- name: leap_ec.ops.clone | ||
params: {} | ||
- name: leap_ec.ops.uniform_crossover | ||
params: {} | ||
- name: leap_ec.int_rep.ops.mutate_binomial | ||
params: | ||
std: 2.5 | ||
bounds: ${bounds} | ||
expected_num_mutations: 3 | ||
- name: leap_ec.ops.evaluate | ||
params: {} | ||
- name: leap_ec.ops.pool | ||
params: | ||
size: ${pop_size} | ||
|
||
analysis: | ||
name: MNIST_example.MNIST_heatmap |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
''' | ||
MNIST_example.py | ||
MNIST occlusion problem. | ||
Gremlin will find patterns of row/column occlusion that | ||
cause the model to perform poorly. | ||
This file defines the model, decoder, generator, and analyzer | ||
dynamically imported, instantiated, and used by the Gremlin | ||
interface. | ||
Training the model is separate from Gremlin. | ||
''' | ||
import copy | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from torchvision.datasets import MNIST | ||
from torchvision.transforms import ToTensor | ||
from leap_ec.decoder import Decoder | ||
|
||
|
||
class RowColDecoder(Decoder): | ||
''' | ||
Decide which rows and columns are grayed out | ||
''' | ||
def decode(self, genome, *args, **kwargs): | ||
row_indices = genome[genome < 28] | ||
col_indices = genome[genome >= 28] | ||
col_indices = col_indices - 28 | ||
return [row_indices, col_indices] | ||
|
||
|
||
class LeNet(nn.Module): | ||
def __init__(self, checkpoint_path=None): | ||
super(LeNet, self).__init__() | ||
self.conv1 = nn.Conv2d(1, 20, 5, 1) | ||
self.conv2 = nn.Conv2d(20, 50, 5, 1) | ||
self.lin1 = nn.Linear(4*4*50, 500) | ||
self.lin2 = nn.Linear(500, 10) | ||
if checkpoint_path is not None: | ||
ckpt = torch.load(checkpoint_path) | ||
self.load_state_dict(ckpt['model_state_dict']) | ||
self.eval() | ||
|
||
def forward(self, xx): | ||
xx = F.relu(self.conv1(xx)) | ||
xx = F.max_pool2d(xx, 2, 2) | ||
xx = F.relu(self.conv2(xx)) | ||
xx = F.max_pool2d(xx, 2, 2) | ||
xx = xx.view(-1, 4*4*50) | ||
xx = F.relu(self.lin1(xx)) | ||
return self.lin2(xx) | ||
|
||
|
||
def MNIST_heatmap(population): | ||
''' | ||
Generate heatmaps of the population | ||
genome using Gremlin's output | ||
''' | ||
genomes = [ind.genome for ind in population] | ||
genomes = np.stack(genomes) | ||
rows = genomes[genomes < 28] | ||
cols = genomes[genomes >= 28] - 28 | ||
heatmap = np.zeros((28, 28)) | ||
for row in rows: | ||
heatmap[row, :] += 1 | ||
for col in cols: | ||
heatmap[:, col] += 1 | ||
plt.imshow(heatmap, cmap='hot') | ||
plt.title('Population Occlusion Frequency') | ||
plt.savefig('MNIST_heatmap.png') | ||
plt.show() | ||
|
||
|
||
class MNISTRowColOcclusionGenerator: | ||
''' | ||
Modifies a dataset of images by "graying-out" | ||
rows and columns of an image | ||
The dataset must be of the form (N, C, H, W) | ||
where N is the number of images, C is the number | ||
of channels (only supports 1 and 3), | ||
H is the height of the image, | ||
and W is the width of the image. | ||
Attributes | ||
---------- | ||
dataset : np.array | ||
set of images to alter | ||
required dimensions (N, C, H, W) | ||
Methods | ||
------- | ||
transform(image, rows, columns) | ||
grey out a row/column of an image | ||
''' | ||
def __init__(self, batch_size, **kwargs): | ||
dataset = MNIST('./data/', transform=ToTensor(), | ||
train=False, download=True) | ||
loader = torch.utils.data.DataLoader(dataset=dataset, | ||
batch_size=batch_size, | ||
shuffle=True) | ||
self.images, self.labels = next(iter(loader)) | ||
|
||
def transform(self, image, rows, columns): | ||
# supports grayscale, rgb, rgba | ||
if image.shape[0] in [1, 3, 4]: | ||
for c in range(image.shape[0]): | ||
image[c, rows, :] = 0.5 | ||
image[c, :, columns] = 0.5 | ||
else: | ||
raise ValueError( | ||
f'Unsupported image dimensions {image.shape}') | ||
return image | ||
|
||
def __call__(self, features): | ||
''' | ||
Generate a new dataset modifying by features | ||
Parameters | ||
---------- | ||
features : list | ||
which rows and columns to obfuscate | ||
features[0] has rows | ||
features[1] has columns | ||
''' | ||
# transform images in the dataset | ||
images = copy.deepcopy(self.images) | ||
for i in range(len(images)): | ||
images[i] = self.transform(images[i], | ||
features[0], | ||
features[1]) | ||
return images |
Oops, something went wrong.