Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic swap mvp #2

Closed
wants to merge 53 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
0eefedf
Dynamic swap working, as long as the vocabularies are the same
XapaJIaMnu Mar 24, 2021
521f634
Model and GPUSlot separation, add vocab support
kpu Mar 28, 2021
67190db
Add vocabulary padding script
kpu Mar 28, 2021
b165af8
Split code into main and library h/cpp
kpu Mar 28, 2021
4d8e327
Restore ensemble support
kpu Mar 28, 2021
203a9bb
Minor logging improvements
kpu Mar 28, 2021
c71d488
Return Histories
kpu Mar 28, 2021
47feb2b
Alignments
kpu Mar 28, 2021
8fc8d02
Fix enit
kpu Mar 28, 2021
b4bded3
Merge github.com:marian-nmt/marian-dev into dynamic_swap_mvp
kpu Mar 29, 2021
b9bc153
Merge https://github.com/kpu/marian-dev into dynamic_swap_mvp
kpu Mar 29, 2021
9b3e76a
Add an option to force loading
kpu Mar 30, 2021
cf12178
Allow CPU only compilation
XapaJIaMnu Mar 30, 2021
7e06801
Add explicit gpu device index when creating the object
XapaJIaMnu Mar 30, 2021
635cfb0
Allow multiple mini-batches
XapaJIaMnu Mar 30, 2021
ee6ff75
No stringstreams
XapaJIaMnu Mar 30, 2021
57ddeba
Sort the histories before returning them
XapaJIaMnu Apr 1, 2021
4f2b218
SwappableSlot: add GPU-to-GPU reset feature
Apr 1, 2021
fa51460
Merge pull request #1 from davidecaroselli/dynamic_swap_mvp
XapaJIaMnu Apr 1, 2021
2062438
Merge branch 'dynamic_swap_mvp' of https://github.com/kpu/marian-dev …
XapaJIaMnu Apr 2, 2021
e3f5388
Separate graph from loading to GPU
XapaJIaMnu Apr 2, 2021
ba4d166
Abort if not initialized
kpu Apr 2, 2021
f8523b7
Go back to Load instead of OverwriteFrom
kpu Apr 2, 2021
8bcfdcc
Check device index
kpu Apr 2, 2021
9c906fe
Add optional parameter "max_length_factor" to BeamSearch::search(); d…
Apr 13, 2021
194c5d2
Remove unused function
kpu Apr 13, 2021
1183436
Free unused parameters of the GPUEngine
kpu Apr 13, 2021
9960571
Completely deallocate Allocator for parameters instead.
kpu Apr 13, 2021
c228ab1
Avoid calling allocator on clear
kpu Apr 13, 2021
ce9ad6c
Set inference only flag on engine
kpu Apr 13, 2021
90e161f
Merge pull request #3 from davidecaroselli/dynamic_swap_mvp
kpu Apr 14, 2021
d6550e4
Add CopyTo(ExpressionGraph) to CPU model
Apr 22, 2021
9931c7b
Remove redundant DeviceId parameter to CPULoadedModel.CopyTo()
Apr 22, 2021
ade2c04
Add on-the-fly restore capability to Adam optimizer
Apr 23, 2021
c88319b
Add restore(state) to base class OptimizerBase
Apr 26, 2021
453f805
Add CPULoadedModel constructor with vector<item> argument
Apr 27, 2021
6ee6a13
Bugfix of Tensorbase::set() - skip set when Item is memory mapped
Apr 27, 2021
a6d99c9
Add eps_ reset in Adagrad::restore()
Apr 27, 2021
8d4cff7
Make swap.h functions available to external projects
May 3, 2021
d30bf12
Add GPUSlot in swappable.h
May 3, 2021
62cddaf
added new condition to postpone validation and checkpoint saving
nicolabertoldi Apr 22, 2022
c01f4c5
added parameters to the config parser
nicolabertoldi Apr 22, 2022
5649097
renamed variable names
nicolabertoldi Apr 22, 2022
cfadf44
Merge pull request #1 from davidecaroselli/features/delayed_valid_and…
Apr 22, 2022
5dab1fa
added new condition to postpone validation and checkpoint saving
nicolabertoldi Apr 22, 2022
79e2c0a
fixings
nicolabertoldi Apr 22, 2022
2ecc247
Merge branch 'dynamic_swap_mvp' into features/postpones_valid_and_save
Apr 27, 2022
d641922
Merge pull request #3 from davidecaroselli/features/postpones_valid_a…
Apr 27, 2022
02fcd05
changed defaults
nicolabertoldi Apr 27, 2022
695b0ef
fix for handling values equals to 0
nicolabertoldi Apr 27, 2022
b31aabe
Update training_state.h
nicolabertoldi May 11, 2022
4d56bd0
Merge pull request #4 from davidecaroselli/features/postpones_valid_a…
May 17, 2022
1cc16bc
Add BeamSearch::setBeamSize() method to dynamically update beam-size
Oct 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions scripts/contrib/pad_model_vocabulary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python3
# Pads a Marian model's vocabulary to have greater size. The added tokens have
# zero probability.
# ./pad_model_vocabulary.py input.npz output.npz desired_vocab_size
#
# You'll also need to separately pad your vocabulary file like so:
# old=$(wc -l input.vocab |cut -d " " -f 1)
# (cat input.vocab; seq -f "<PADDING%g>" $((desired_vocab_size-old))) >output.vocab
#
# Warning: probably only works with shared vocabulary models.
import math
import numpy as np
import sys
import yaml

# Amend the vocab size in a raw ["special:model.yml"] data from a Marian npz.
# Returns the raw data to use for ["special:model.yml"]
def substitute_vocab_config(raw, new_size):
print("Old yml: ", raw.tostring())
raw_yaml = raw.tostring().decode("utf-8")
#Python yaml doesn't like null bytes.
if raw_yaml.endswith("\x00"):
raw_yaml = raw_yaml[:-1]
config = yaml.load(raw_yaml)
config['dim-vocabs'] = [new_size] * len(config['dim-vocabs'])
raw_yaml = yaml.dump(config)
if raw_yaml.endswith("\n"):
raw_yaml = raw_yaml[:-1]
raw_yaml += "\x00"
return np.array(bytearray(raw_yaml, 'utf-8'))

if len(sys.argv) != 4:
print("Usage: " + sys.argv[0] + " input.npz output.npz desired_vocab_size")
sys.exit(1)

resized_path = sys.argv[2]
new_size = int(sys.argv[3])
old_model = np.load(sys.argv[1])

new_model = dict(old_model)
old_size = len(old_model["Wemb"])
if old_size > new_size:
sys.stderr.write("New size is smaller than original. Cowardly refusing to clip vocab.\n")
sys.exit(2)
print("Before: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape)
bias = new_model["decoder_ff_logit_out_b"]
new_model["decoder_ff_logit_out_b"] = np.pad(bias, [(0,0),(0,new_size - bias.shape[1])], mode='constant', constant_values = -math.inf)
new_model["Wemb"] = np.pad(new_model["Wemb"], [(0,new_size - bias.shape[1]), (0,0)], mode='constant', constant_values = 0)
print("After: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape)
new_model["special:model.yml"] = substitute_vocab_config(new_model["special:model.yml"], new_size)
print("New yml: ", new_model["special:model.yml"].tostring())
np.savez(resized_path, **new_model)
8 changes: 7 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ set(MARIAN_SOURCES
translator/nth_element.cpp
translator/helpers.cpp
translator/scorers.cpp
translator/swappable.cpp

training/graph_group_async.cpp
training/graph_group_sync.cpp
Expand Down Expand Up @@ -182,6 +183,7 @@ if(CUDA_FOUND)
tensors/gpu/add_all.cu
tensors/gpu/tensor_operators.cu
tensors/gpu/cudnn_wrappers.cu
tensors/gpu/swap.cu
translator/nth_element.cu
translator/helpers.cu
STATIC)
Expand Down Expand Up @@ -213,6 +215,10 @@ if (NOT COMPILE_LIBRARY_ONLY)
set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder)
target_compile_options(marian_decoder PRIVATE ${ALL_WARNINGS})

add_executable(marian_swapper command/marian_swapper.cpp)
set_target_properties(marian_swapper PROPERTIES OUTPUT_NAME marian_swapper)
target_compile_options(marian_swapper PRIVATE ${ALL_WARNINGS})

add_executable(marian_scorer command/marian_scorer.cpp)
set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer)
target_compile_options(marian_scorer PRIVATE ${ALL_WARNINGS})
Expand All @@ -225,7 +231,7 @@ if (NOT COMPILE_LIBRARY_ONLY)
set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv)
target_compile_options(marian_conv PRIVATE ${ALL_WARNINGS})

set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv)
set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_swapper marian_scorer marian_vocab marian_conv)

# marian.zip and marian.tgz
# This combines marian, marian_decoder in a single ZIP or TAR file for
Expand Down
98 changes: 98 additions & 0 deletions src/command/marian_swapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include "translator/history.h"
#include "translator/output_printer.h"
#include "translator/swappable.h"

#include <iostream>
#include <string>
#include <unordered_map>

namespace marian {
void LoadBig(Ptr<Options> options, std::unordered_map<std::string, CPULoadedModel> &to) {
to.emplace("pten", CPULoadedModel(options,
"/home/ubuntu/consistent-big-models/padded/pten.npz",
{"/home/ubuntu/consistent-big-models/padded/pten.vocab"},
"/home/ubuntu/consistent-big-models/padded/pten.vocab"));

to.emplace("enit", CPULoadedModel(options,
"/home/ubuntu/consistent-big-models/padded/enit.npz",
{"/home/ubuntu/consistent-big-models/padded/enit.vocab"},
"/home/ubuntu/consistent-big-models/padded/enit.vocab"));
}

void LoadTiny(Ptr<Options> options, std::unordered_map<std::string, CPULoadedModel> &to) {
std::vector<std::string> models = {"csen", "encs", "enet", "eten", "esen", "enes"};
for (const std::string m : models) {
std::string base = "/home/ubuntu/consistent-bergamot-students/padded/";
base += m + ".";
to.emplace(m, CPULoadedModel(options, base + "npz", {base + "spm"}, base + "spm"));
}
}

} // namespace

/* Demo program: run with options for any of the models */
int main(int argc, char** argv) {
using namespace marian;
Ptr<Options> options = parseOptions(argc, argv, cli::mode::translation);

Ptr<GPUEngine> engine = New<GPUEngine>(options, 0);
GPULoadedModel slot(engine);

std::unordered_map<std::string, CPULoadedModel> models;
// LoadBig(options, models);
LoadTiny(options, models);

// begin with a space to avoid conflict with a real sentence.
const std::string kSwitchPrefix(" CHANGE ");

bool alignments = !options->get<std::string>("alignment").empty();

bool loaded = false;
std::string line;
while (std::getline(std::cin, line)) {
// Switch out which model is used.
if (line.substr(0, kSwitchPrefix.size()) == kSwitchPrefix) {
std::string key = line.substr(kSwitchPrefix.size());
auto found = models.find(key);
if (found == models.end()) {
std::cerr << "Model for " << key << " not loaded." << std::endl;
return 1;
}
slot.Load(found->second);
loaded = true;
continue;
}
if (!loaded) {
std::cerr << "Select a model first." << std::endl;
continue;
}

// Actually translating with a model.
marian::Histories histories = slot.Translate({line});
// In practice there is one history because we provided one line.
for(auto history : histories) {
Result result(history->top());
Words words = std::get<0>(result);
std::cout << slot.TrgVocab()->decode(words) << std::endl;

/* Print alignments */
if (alignments) {
Hypothesis &hypo = *std::get<1>(result);
// [t][s] -> P(s|t)
marian::data::SoftAlignment alignment(hypo.tracebackAlignment());
// An easier call for this is:
// std:cout << data::SoftAlignToString(alignment);
// The below is just there to show how access them programatically.
// NB you can convert to hard with data::ConvertSoftAlignToHardAlign(alignment, threshold)
for (auto target : alignment) {
for (float source : target) {
std::cout << source << ' ';
}
std::cout << '\n';
}
}
}
}

return 0;
}
13 changes: 10 additions & 3 deletions src/common/config_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,9 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
"Redefine logical epoch counter as multiple of data epochs (e.g. 1e), updates (e.g. 100Ku) or labels (e.g. 1Gt). "
"Second parameter defines width of fractional display, 0 by default.",
{"1e", "0"});
cli.add<std::string>("--save-from",
"Save model not before arg updates (append 't' for every arg target labels)",
"0u");

addSuboptionsInputLength(cli);
addSuboptionsTSV(cli);
Expand Down Expand Up @@ -572,8 +575,11 @@ void ConfigParser::addOptionsValidation(cli::CLIWrapper& cli) {
cli.add<bool>("--valid-reset-stalled",
"Reset all stalled validation metrics when the training is restarted");
cli.add<size_t>("--early-stopping",
"Stop if the first validation metric does not improve for arg consecutive validation steps",
10);
"Stop if the first validation metric does not improve for arg consecutive validation steps",
10);
cli.add<std::string>("--valid-from",
"Validate model not before arg updates (append 't' for every arg target labels)",
"0u");

// decoding options
cli.add<size_t>("--beam-size,-b",
Expand Down Expand Up @@ -691,7 +697,8 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) {
cli.add<std::vector<int>>("--output-approx-knn",
"Use approximate knn search in output layer (currently only in transformer)")
->implicit_val("100 1024");

cli.add<std::string>("--swap-model",
"Path to model to swap to.");
#if 0 // @TODO: Ask Hany if there are any decoding-time options
// add ULR settings
addSuboptionsULR(cli);
Expand Down
39 changes: 25 additions & 14 deletions src/graph/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <fstream>
#include <map>
#include <unordered_map>
#include <unordered_set>

#include "common/definitions.h"
Expand All @@ -22,12 +23,12 @@ class Parameters {

/** @brief List of all parameter nodes of this expression graph. */
std::vector<Expr> params_;
std::map<std::string, Expr> named_;
std::unordered_map<std::string, Expr> named_;

Ptr<TensorAllocator> vals_;
Ptr<TensorAllocator> grads_;
std::unique_ptr<TensorAllocator> vals_;
std::unique_ptr<TensorAllocator> grads_;

size_t totalCapacity(Ptr<TensorAllocator> alloc) {
size_t totalCapacity(const TensorAllocator *alloc) {
size_t sum = 0;
for(auto p : params_) {
sum += alloc->capacity(p->shape(), p->value_type());
Expand Down Expand Up @@ -73,18 +74,18 @@ class Parameters {
}

virtual void init(Ptr<Backend> backend) {
vals_ = New<TensorAllocator>(backend);
grads_ = New<TensorAllocator>(backend);
vals_.reset(new TensorAllocator(backend));
grads_.reset(new TensorAllocator(backend));
}

virtual void init(Ptr<Backend> backend, Ptr<Device> device) {
vals_ = New<TensorAllocator>(backend, device);
grads_ = New<TensorAllocator>(backend, device);
vals_.reset(new TensorAllocator(backend, device));
grads_.reset(new TensorAllocator(backend, device));
}

virtual void allocateForward() {
if(!params_.empty() && vals_->size() == 0) {
vals_->reserveExact(totalCapacity(vals_));
if(!params_.empty() && vals_ && vals_->size() == 0) {
vals_->reserveExact(totalCapacity(vals_.get()));

// sort parameters by name before allocation to make sure the memory layout after allocation is always the same
std::sort(params_.begin(), params_.end(), [](Expr n1, Expr n2){ return n1->name() < n2->name(); });
Expand All @@ -98,12 +99,12 @@ class Parameters {
}

virtual void allocateBackward() {
if(!params_.empty() && grads_->size() == 0) {
if(!params_.empty() && grads_ && grads_->size() == 0) {

// sort parameters by name before allocation to make sure the memory layout after allocation is always the same
std::sort(params_.begin(), params_.end(), [](Expr n1, Expr n2){ return n1->name() < n2->name(); });

grads_->reserveExact(totalCapacity(grads_));
grads_->reserveExact(totalCapacity(grads_.get()));
for(auto p : params_)
if(!p->grad())
grads_->allocate(p->grad(), p->shape(), p->value_type());
Expand All @@ -120,8 +121,14 @@ class Parameters {
params_.clear();
named_.clear();

vals_->clear();
grads_->clear();
if (vals_) vals_->clear();
if (grads_) grads_->clear();
}

// Free the underlying memory but leave the parameters and names with dangling pointers. Only use this if you're going to edit the memory pointers manually.
virtual void freeMemory() {
vals_.reset();
grads_.reset();
}
};

Expand Down Expand Up @@ -167,6 +174,10 @@ class MappedParameters : public Parameters {
params_.clear();
named_.clear();
}

void freeMemory() override {
ABORT("Have not implemented partial munmap for parameters");
}
};

} // namespace marian
29 changes: 27 additions & 2 deletions src/optimizers/optimizers.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class OptimizerBase : public TrainingObserver, public ExponentialSmoothing {
batchesSeen_ = state.batches;
}

virtual void restore(const std::vector<io::Item> &state) {};

virtual void actAfterLoaded(TrainingState& state) override {
eta_ = state.eta;
batchesSeen_ = state.batches;
Expand Down Expand Up @@ -195,19 +197,32 @@ class Adagrad : public OptimizerBase {

void setParams(const std::vector<float>& params) override {
if(params.size() > 0)
eps_ = params[0];
def_eps_ = eps_ = params[0];
}

std::vector<Tensor> getShards() override {
std::vector<Tensor> getShards() override {
auto shards = OptimizerBase::getShards();
shards.push_back(gt_);
return shards;
}

void restore(const std::vector<io::Item> &state) override {
OptimizerBase::restore(state);

for (auto &item : state) {
if ("adagrad_gt" == item.name) {
if (gt_) gt_->set(item);
}
}

eps_ = def_eps_;
}

private:
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize) override;
void resetStats() override;

float def_eps_ = 1e-8f;
float eps_ = 1e-8f;
Ptr<TensorAllocator> alloc_;
Tensor gt_;
Expand Down Expand Up @@ -242,6 +257,16 @@ class Adam : public OptimizerBase {
return shards;
}

void restore(const std::vector<io::Item> &state) override {
for (auto &item : state) {
if ("adam_mt" == item.name) {
if (mt_) mt_->set(item);
} else if ("adam_vt" == item.name) {
if (vt_) vt_->set(item);
}
}
}

private:
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize) override;
void resetStats() override;
Expand Down
4 changes: 2 additions & 2 deletions src/tensors/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class Allocator {
reserve(bytes);
}

size_t alignedSize(size_t size) {
size_t alignedSize(size_t size) const {
return (size_t)(ceil(size / (double)alignment_) * alignment_);
}

Expand All @@ -189,7 +189,7 @@ class Allocator {
}

template <typename T>
size_t capacity(size_t num) {
size_t capacity(size_t num) const {
return alignedSize(num * sizeof(T));
}

Expand Down
Loading
Loading