Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added NeptuneSaver for logging model checkpoints #821

Merged
merged 28 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0760506
added output handlers, model checkpoint handler, added mnist example,…
jakubczakon Jan 26, 2020
ea65a1a
added exp link to examples, added tests
jakubczakon Jan 26, 2020
9372b8b
added neptune do docs
jakubczakon Jan 26, 2020
f3d81c5
Merge branch 'master' into master
jakubczakon Jan 26, 2020
c3d9556
fixed test
jakubczakon Jan 26, 2020
bfff60c
Merge branch 'master' of https://github.com/neptune-ai/ignite
jakubczakon Jan 26, 2020
741d3d7
fixed imports
jakubczakon Jan 26, 2020
0d3b1f5
added neptune-client to test dependencies
jakubczakon Jan 26, 2020
7f00f6a
fixed missing package message
jakubczakon Jan 26, 2020
cc5ca70
Merge branch 'master' into master
jakubczakon Jan 26, 2020
cc450b8
dropped model checkpoing handler
jakubczakon Jan 27, 2020
b0bf959
updated experiment link
jakubczakon Jan 27, 2020
73d4001
dropped __futures__ print_function
jakubczakon Jan 27, 2020
e47a994
updated fork
jakubczakon Feb 29, 2020
d69068c
added NeptuneSaver and tests
jakubczakon Feb 29, 2020
1cdfd83
autopep8 fix
Feb 29, 2020
39e2c66
updated token to anonymous user neptuner
jakubczakon Feb 29, 2020
de7000f
Merge branch 'master' of https://github.com/neptune-ai/ignite
jakubczakon Feb 29, 2020
5b345a6
updated experiment link
jakubczakon Mar 2, 2020
b2e0901
Merge branch 'master' into master
jakubczakon Mar 2, 2020
7ddf4d3
updated token to 'ANONYMOUS'
jakubczakon Mar 7, 2020
6b13741
Merge branch 'master' of https://github.com/neptune-ai/ignite
jakubczakon Mar 7, 2020
a0a43d9
Merge branch 'master' into master
jakubczakon Mar 7, 2020
05d3c6f
updated examples, fixed tests
jakubczakon Mar 7, 2020
4022987
autopep8 fix
Mar 7, 2020
d2a8b23
fixed serializable test
jakubczakon Mar 7, 2020
fb59d0c
Merge branch 'master' of https://github.com/neptune-ai/ignite
jakubczakon Mar 7, 2020
2a79216
fixed serializable model test
jakubczakon Mar 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions examples/contrib/mnist/mnist_with_neptune_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

Note:
You can see an example experiment here:
https://ui.neptune.ai/o/neptune-ai/org/pytorch-ignite-integration/e/PYTOR-26/charts
https://ui.neptune.ai/o/shared/org/pytorch-ignite-integration/e/PYTOR-26/charts
"""
import sys
from argparse import ArgumentParser
Expand All @@ -31,6 +31,7 @@

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.handlers import Checkpoint

from ignite.contrib.handlers.neptune_logger import *

Expand Down Expand Up @@ -69,7 +70,7 @@ def get_data_loaders(train_batch_size, val_batch_size):
return train_loader, val_loader


def run(train_batch_size, val_batch_size, epochs, lr, momentum, neptune_project):
def run(train_batch_size, val_batch_size, epochs, lr, momentum):
train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
model = Net()
device = "cpu"
Expand Down Expand Up @@ -104,8 +105,8 @@ def compute_metrics(engine):
validation_evaluator.run(val_loader)

npt_logger = NeptuneLogger(
api_token=None,
project_name=neptune_project,
api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
name="ignite-mnist-example",
params={
"train_batch_size": train_batch_size,
Expand Down Expand Up @@ -146,6 +147,16 @@ def compute_metrics(engine):

npt_logger.attach(trainer, log_handler=GradsScalarHandler(model), event_name=Events.ITERATION_COMPLETED(every=100))

def score_function(engine):
return engine.state.metrics['accuracy']

to_save = {'model': model}
handler = Checkpoint(to_save, NeptuneSaver(npt_logger), n_saved=2,
filename_prefix='best', score_function=score_function,
score_name="validation_accuracy",
global_step_transform=global_step_from_engine(trainer))
validation_evaluator.add_event_handler(Events.COMPLETED, handler)

# kick everything off
trainer.run(train_loader, max_epochs=epochs)
npt_logger.close()
Expand All @@ -160,7 +171,6 @@ def compute_metrics(engine):
parser.add_argument("--epochs", type=int, default=10, help="number of epochs to train (default: 10)")
parser.add_argument("--lr", type=float, default=0.01, help="learning rate (default: 0.01)")
parser.add_argument("--momentum", type=float, default=0.5, help="SGD momentum (default: 0.5)")
parser.add_argument("--neptune_project", type=str, help="your project in neptune.ai")

args = parser.parse_args()

Expand All @@ -172,4 +182,4 @@ def compute_metrics(engine):
logger.addHandler(handler)
logger.setLevel(logging.INFO)

run(args.batch_size, args.val_batch_size, args.epochs, args.lr, args.momentum, args.neptune_project)
run(args.batch_size, args.val_batch_size, args.epochs, args.lr, args.momentum)
128 changes: 110 additions & 18 deletions ignite/contrib/handlers/neptune_logger.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import os

import numbers
import tempfile
from typing import Mapping
import warnings

import torch

import ignite
from ignite.engine import Events
from ignite.contrib.handlers.base_logger import (
BaseLogger,
BaseOptimizerParamsHandler,
Expand All @@ -18,6 +16,7 @@

__all__ = [
"NeptuneLogger",
"NeptuneSaver",
"OptimizerParamsHandler",
"OutputHandler",
"WeightsScalarHandler",
Expand All @@ -36,8 +35,10 @@ class OutputHandler(BaseOutputHandler):
from ignite.contrib.handlers.neptune_logger import *

# Create a logger
npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand All @@ -62,8 +63,10 @@ class OutputHandler(BaseOutputHandler):
def evaluate(engine):
evaluator.run(validation_set, max_epochs=1)

npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite", "minst"] # Optional
Expand Down Expand Up @@ -150,8 +153,10 @@ class OptimizerParamsHandler(BaseOptimizerParamsHandler):
from ignite.contrib.handlers.neptune_logger import *

# Create a logger
npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand Down Expand Up @@ -198,8 +203,10 @@ class WeightsScalarHandler(BaseWeightsScalarHandler):
from ignite.contrib.handlers.neptune_logger import *

# Create a logger
npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand Down Expand Up @@ -251,8 +258,10 @@ class GradsScalarHandler(BaseWeightsScalarHandler):
from ignite.contrib.handlers.neptune_logger import *

# Create a logger
npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand Down Expand Up @@ -334,8 +343,10 @@ class NeptuneLogger(BaseLogger):
from ignite.contrib.handlers.neptune_logger import *

# Create a logger
npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand Down Expand Up @@ -378,16 +389,33 @@ class NeptuneLogger(BaseLogger):
npt_logger.close()

Explore an experiment with neptune tracking here:
https://ui.neptune.ai/o/neptune-ai/org/pytorch-ignite-integration/e/PYTOR-26/charts
https://ui.neptune.ai/o/shared/org/pytorch-ignite-integration/e/PYTOR1-18/charts
You can save model checkpoints to a Neptune server:

.. code-block:: python

from ignite.handlers import Checkpoint

def score_function(engine):
return engine.state.metrics['accuracy']

to_save = {'model': model}
handler = Checkpoint(to_save, NeptuneSaver(npt_logger), n_saved=2,
filename_prefix='best', score_function=score_function,
score_name="validation_accuracy",
global_step_transform=global_step_from_engine(trainer))
validation_evaluator.add_event_handler(Events.COMPLETED, handler)

It is also possible to use the logger as context manager:

.. code-block:: python

from ignite.contrib.handlers.neptune_logger import *

with npt_logger = NeptuneLogger(api_token=os.environ["NEPTUNE_API_TOKEN"],
project_name="USER_NAME/PROJECT_NAME",
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
Expand Down Expand Up @@ -426,3 +454,67 @@ def __init__(self, *args, **kwargs):

def close(self):
self.experiment.stop()


class NeptuneSaver:
"""Handler that saves input checkpoint to the Neptune server.

Args:
neptune_logger (ignite.contrib.handlers.neptune_logger.NeptuneLogger): an instance of
NeptuneLogger class.

Examples:

.. code-block:: python

from ignite.contrib.handlers.neptune_logger import *

# Create a logger
# We are using the api_token for the anonymous user neptuner but you can use your own.

npt_logger = NeptuneLogger(api_token="ANONYMOUS",
project_name="shared/pytorch-ignite-integration",
experiment_name="cnn-mnist", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-ignite","minst"] # Optional
)

...
evaluator = create_supervised_evaluator(model, metrics=metrics, ...)
...

from ignite.handlers import Checkpoint

def score_function(engine):
return engine.state.metrics['accuracy']

to_save = {'model': model}

# pass neptune logger to NeptuneServer

handler = Checkpoint(to_save, NeptuneSaver(npt_logger), n_saved=2,
filename_prefix='best', score_function=score_function,
score_name="validation_accuracy",
global_step_transform=global_step_from_engine(trainer))

evaluator.add_event_handler(Events.COMPLETED, handler)

# We need to close the logger when we are done
npt_logger.close()

For example, you can access model checkpoints and download them from here:
https://ui.neptune.ai/o/shared/org/pytorch-ignite-integration/e/PYTOR1-18/charts

"""

def __init__(self, neptune_logger: NeptuneLogger):
self._experiment = neptune_logger.experiment

def __call__(self, checkpoint: Mapping, filename: str) -> None:

with tempfile.NamedTemporaryFile() as tmp:
torch.save(checkpoint, tmp.name)
self._experiment.log_artifact(tmp.name, filename)

def remove(self, filename: str) -> None:
self._experiment.delete_artifacts(filename)
39 changes: 38 additions & 1 deletion tests/ignite/contrib/handlers/test_neptune_logger.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import math
import warnings

from unittest.mock import call, ANY, MagicMock
import pytest
from unittest.mock import MagicMock, call, ANY
import torch

from ignite.engine import Engine, Events, State
Expand Down Expand Up @@ -429,6 +430,42 @@ def dummy_handler(engine, logger, event_name):
trainer.run(data, max_epochs=n_epochs)


def test_neptune_saver_serializable(dummy_model_factory, dirname):

mock_logger = MagicMock(spec=NeptuneLogger)
mock_logger.experiment = MagicMock()

model = torch.nn.Module()
to_save_serializable = {"model": model}

saver = NeptuneSaver(mock_logger)
fname = "test.pth"
saver(to_save_serializable, fname)

assert mock_logger.experiment.log_artifact.call_count == 1


def test_neptune_saver_non_serializable(dirname):

mock_logger = MagicMock(spec=NeptuneLogger)
mock_logger.experiment = MagicMock()

to_save_non_serializable = {"model": lambda x: x}

saver = NeptuneSaver(mock_logger)
fname = "test.pth"
try:
with warnings.catch_warnings():
# Ignore torch/serialization.py:292: UserWarning: Couldn't retrieve source code for container of type
# DummyModel. It won't be checked for correctness upon loading.
warnings.simplefilter("ignore", category=UserWarning)
saver(to_save_non_serializable, fname)
except Exception:
pass

assert mock_logger.experiment.log_artifact.call_count == 0


@pytest.fixture
def no_site_packages():
import sys
Expand Down