Skip to content

Commit

Permalink
Merge pull request #1 from trevorcohn/moving_to_dynet
Browse files Browse the repository at this point in the history
Moving to dynet
  • Loading branch information
trevorcohn committed Jul 21, 2017
2 parents 7fecf03 + f54f413 commit 0049ada
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 142 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
@@ -1,3 +1,3 @@
[submodule "cnn"]
path = cnn
url = https://github.com/clab/cnn.git
[submodule "dynet"]
path = dynet
url = https://github.com/clab/dynet.git
69 changes: 51 additions & 18 deletions CMakeLists.txt
@@ -1,35 +1,68 @@
project(mantis)
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cnn/cmake)

# CNN uses Eigen which exploits modern CPU architectures. To get the
# best possible performance, the following are recommended:
# 1. use very recent versions of gcc or Clang to build
# 2. use very recent versions of Eigen (ideally the dev version)
# 3. try compiler options like -march=native or other architecture
# flags (the compiler does not always make the best configuration
# decisions without help)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/dynet/cmake)

function(find_mkl)
set(MKL_ARCH intel64)
find_path(MKL_INCLUDE_DIR mkl.h
PATHS ${MKL_ROOT} ${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_intel_lp64 mkl_intel_thread mkl_core
PATHS ${MKL_ROOT} ${MKL_ROOT}/lib/${MKL_ARCH}
DOC "MKL core library path")

find_library(MKL_COMPILER_LIB NAMES iomp5 libiomp5md
PATHS ${MKL_ROOT} ${MKL_ROOT}/../compiler/lib/${MKL_ARCH} #Windows
${MKL_ROOT}/../compilers_and_libraries/linux/lib/${MKL_ARCH}_lin #Linux
DOC "MKL compiler lib (for threaded MKL)")

if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_COMPILER_LIB)
get_filename_component(MKL_CORE_LIB_DIR ${MKL_CORE_LIB} DIRECTORY)
get_filename_component(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB} DIRECTORY)
get_filename_component(MKL_COMPILER_LIB_FILE ${MKL_COMPILER_LIB} NAME)
message(STATUS "Found MKL\n * include: ${MKL_INCLUDE_DIR},\n * core library dir: ${MKL_CORE_LIB_DIR},\n * compiler library: ${MKL_COMPILER_LIB}")

# Due to a conflict with /MT and /MD, MSVC needs mkl_intel_lp64 linked last, or we can change individual
# projects to use /MT (mkl_intel_lp64 linked with /MT, default MSVC projects use /MD), or we can instead
# link to the DLL versions. For now I'm opting for this solution which seems to work with projects still
# at their default /MD. Linux build requires the mkl_intel_lp64 to be linked first. So...:
if(MSVC)
set(LIBS ${LIBS} mkl_intel_thread mkl_core mkl_intel_lp64 ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE)
else()
set(LIBS ${LIBS} mkl_intel_lp64 mkl_intel_thread mkl_core ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE)
endif()
include_directories(${MKL_INCLUDE_DIR})
link_directories(${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR})
set(MKL_LINK_DIRS ${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR} PARENT_SCOPE) # Keeping this for python build
else()
message(FATAL_ERROR "Failed to find MKL in path: ${MKL_ROOT} (Did you set MKL_ROOT properly?)")
endif()
endfunction()

# Cross-compiler, cross-platform options
######## Cross-compiler, cross-platform options
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_FAST_MATH")
if (MKL OR MKL_ROOT)
find_mkl() # sets include/lib directories and sets ${LIBS} needed for linking
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_USE_MKL_ALL")
endif()


# Platform-specific options
######## Platform-specific options
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX") # Disable min/max macros in windef.h
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX") # Disable min/max macros in windef.h
endif()

# Compiler-specific options
######## Compiler-specific options
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W1 -DEIGEN_HAS_C99_MATH /MP") # -Wall produces 20k warnings
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W1 -DEIGEN_HAS_C99_MATH /MP") # -Wall produces 20k warnings
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -march=native")
endif()

enable_testing()

include_directories(${CMAKE_CURRENT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/cnn)
${PROJECT_SOURCE_DIR}/dynet)

function(find_cudnn)
set(CUDNN_ROOT "" CACHE PATH "CUDNN root path")
Expand Down Expand Up @@ -111,9 +144,9 @@ include_directories(${EIGEN3_INCLUDE_DIR})
FIND_PACKAGE(Threads REQUIRED)
set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cnn/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dynet/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
include_directories(${CMAKE_CURRENT_BINARY_DIR})

add_subdirectory(cnn/cnn)
add_subdirectory(dynet/dynet)
add_subdirectory(src)
enable_testing()
1 change: 0 additions & 1 deletion cnn
Submodule cnn deleted from ec75eb
1 change: 1 addition & 0 deletions dynet
Submodule dynet added at b9308e
21 changes: 18 additions & 3 deletions src/CMakeLists.txt
Expand Up @@ -2,14 +2,29 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)

foreach(TARGET attentional biattentional)
ADD_EXECUTABLE(${TARGET} ${TARGET}.cc)
target_link_libraries(${TARGET} cnn ${LIBS})
if(UNIX AND NOT APPLE)
target_link_libraries(${TARGET} rt)
endif()
if (WITH_CUDA_BACKEND)
add_dependencies(${TARGET} cnncuda)
target_link_libraries(${TARGET} cnncuda)
set(CUDA_SEPARABLE_COMPILATION ON)
list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-DVERBOSE;-DEIGEN_USE_GPU;-DHAVE_CUDA")
if(CMAKE_COMPILER_IS_GNUCXX)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
# gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use -O1 instead for now.
list(APPEND CUDA_NVCC_FLAGS "-O1")
else()
list(APPEND CUDA_NVCC_FLAGS "-O2")
endif()
else()
list(APPEND CUDA_NVCC_FLAGS "-O2")
endif()
add_dependencies(${TARGET} gdynet dynetcuda)
target_link_libraries(${TARGET} gdynet dynetcuda)
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET})
else()
add_dependencies(${TARGET} dynet)
target_link_libraries(${TARGET} dynet ${LIBS})
endif (WITH_CUDA_BACKEND)
endforeach()

84 changes: 47 additions & 37 deletions src/attentional.cc
Expand Up @@ -10,7 +10,7 @@
#include <boost/program_options/variables_map.hpp>

using namespace std;
using namespace cnn;
using namespace dynet;
using namespace boost::program_options;

unsigned LAYERS = 1; // 2
Expand All @@ -19,8 +19,8 @@ unsigned ALIGN_DIM = 32; // 128
unsigned SRC_VOCAB_SIZE = 0;
unsigned TGT_VOCAB_SIZE = 0;

cnn::Dict sd;
cnn::Dict td;
dynet::Dict sd;
dynet::Dict td;
int kSRC_SOS;
int kSRC_EOS;
int kTGT_SOS;
Expand All @@ -45,7 +45,7 @@ template <class rnn_t>
int main_body(variables_map vm);

int main(int argc, char** argv) {
cnn::initialize(argc, argv);
dynet::initialize(argc, argv);

// command line processing
variables_map vm;
Expand All @@ -65,6 +65,10 @@ int main(int argc, char** argv) {
("layers,l", value<int>()->default_value(LAYERS), "use <num> layers for RNN components")
("align,a", value<int>()->default_value(ALIGN_DIM), "use <num> dimensions for alignment projection")
("hidden,h", value<int>()->default_value(HIDDEN_DIM), "use <num> dimensions for recurrent hidden states")
("sgd_trainer", value<unsigned>()->default_value(0), "use specific SGD trainer (0: vanilla SGD; 1: momentum SGD; 2: Adagrad; 3: AdaDelta; 4: Adam)")
("lr_eta", value<float>()->default_value(0.01f), "SGD learning rate value (e.g., 0.01 for simple SGD trainer)")
("lr_eta_decay", value<float>()->default_value(2.0f), "SGD learning rate decay value")
("sparse_updates", value<bool>()->default_value(true), "enable/disable sparse update(s) for lookup parameter(s)")
("topk,k", value<int>()->default_value(100), "use <num> top kbest entries, used with --kbest")
("epochs,e", value<int>()->default_value(50), "maximum number of training epochs")
("gru", "use Gated Recurrent Unit (GRU) for recurrent structure; default RNN")
Expand Down Expand Up @@ -112,8 +116,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
bool doco, float coverage, bool display, bool fert);

template <class AM_t> void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco);
template <class AM_t> void test_decode(Model &model, AM_t &am, std::string test_file, bool doco, int beam);
template <class AM_t> void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k);
template <class AM_t> void test_decode(Model &model, AM_t &am, std::string test_file, bool doco, unsigned beam);
template <class AM_t> void test_kbest_arcs(Model &model, AM_t &am, string test_file, unsigned top_k);
template <class AM_t> void fert_stats(Model &model, AM_t &am, Corpus &devel, bool global_fert);

const Sentence* context(const Corpus &corpus, unsigned i);
Expand Down Expand Up @@ -146,8 +150,6 @@ int main_body(variables_map vm)
if (vm.count("lstm")) flavour = "LSTM";
else if (vm.count("gru")) flavour = "GRU";

typedef vector<int> Sentence;
typedef pair<Sentence, Sentence> SentencePair;
Corpus training, devel, testing;
string line;
cerr << "Reading training data from " << vm["train"].as<string>() << "...\n";
Expand Down Expand Up @@ -204,15 +206,25 @@ int main_body(variables_map vm)

cerr << "Parameters will be written to: " << fname << endl;

Model model;
//bool use_momentum = false;
Trainer* sgd = nullptr;
//if (use_momentum)
//sgd = new MomentumSGDTrainer(&model);
//else
sgd = new SimpleSGDTrainer(&model);
sgd->eta = 0.01f;
//sgd = new AdadeltaTrainer(&model);
Model model;
Trainer* sgd = nullptr;
unsigned sgd_type = vm["sgd_trainer"].as<unsigned>();
if (sgd_type == 1)
sgd = new MomentumSGDTrainer(model, vm["lr_eta"].as<float>());
else if (sgd_type == 2)
sgd = new AdagradTrainer(model, vm["lr_eta"].as<float>());
else if (sgd_type == 3)
sgd = new AdadeltaTrainer(model);
else if (sgd_type == 4)
sgd = new AdamTrainer(model, vm["lr_eta"].as<float>());
else if (sgd_type == 0)//Vanilla SGD trainer
sgd = new SimpleSGDTrainer(model, vm["lr_eta"].as<float>());
else
assert("Unknown SGD trainer type! (0: vanilla SGD; 1: momentum SGD; 2: Adagrad; 3: AdaDelta; 4: Adam)");
sgd->eta_decay = vm["lr_eta_decay"].as<float>();
sgd->sparse_updates_enabled = vm["sparse_updates"].as<bool>();
if (!sgd->sparse_updates_enabled)
cerr << "Sparse updates for lookup parameter(s) to be disabled!" << endl;

cerr << "%% Using " << flavour << " recurrent units" << endl;
AttentionalModel<rnn_t> am(&model, SRC_VOCAB_SIZE, TGT_VOCAB_SIZE,
Expand All @@ -234,19 +246,19 @@ int main_body(variables_map vm)
vm["epochs"].as<int>(), doco, vm["coverage"].as<float>(), vm.count("display"),
fert);
else if (vm.count("kbest"))
test_kbest_arcs(model, am, vm["kbest"].as<string>(), vm["topk"].as<int>());
test_kbest_arcs(model, am, vm["kbest"].as<string>(), vm["topk"].as<unsigned>());
else if (vm.count("test")) {
if (vm.count("rescore"))
test_rescore(model, am, testing, doco);
else // test
test_decode(model, am, vm["test"].as<string>(), doco, vm["beam"].as<int>());
test_decode(model, am, vm["test"].as<string>(), doco, vm["beam"].as<unsigned>());
}
else if (vm.count("fert-stats"))
fert_stats(model, am, devel, vm.count("fertility"));

delete sgd;

//cnn::Free();
//dynet::Free();

return EXIT_SUCCESS;
}
Expand All @@ -264,9 +276,9 @@ void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco)
tie(ssent, tsent, docid) = testing[i];

ComputationGraph cg;
am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(testing, i) : nullptr);
auto iloss = am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(testing, i) : nullptr);

double loss = as_scalar(cg.forward());
double loss = as_scalar(cg.forward(iloss));
cout << i << " |||";
for (auto &w: ssent)
cout << " " << sd.convert(w);
Expand All @@ -286,10 +298,8 @@ void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco)
}

template <class AM_t>
void test_decode(Model &model, AM_t &am, string test_file, bool doco, int beam)
void test_decode(Model &model, AM_t &am, string test_file, bool doco, unsigned beam)
{
double tloss = 0;
int tchars = 0;
int lno = 0;

cerr << "Reading test examples from " << test_file << endl;
Expand All @@ -304,7 +314,7 @@ void test_decode(Model &model, AM_t &am, string test_file, bool doco, int beam)
if (doco)
source = read_numbered_sentence(line, &sd, num);
else
source = read_sentence(line, &sd);
source = read_sentence(line, sd);

if (source.front() != kSRC_SOS && source.back() != kSRC_EOS) {
cerr << "Sentence in " << test_file << ":" << lno << " didn't start or end with <s>, </s>\n";
Expand Down Expand Up @@ -339,7 +349,7 @@ void test_decode(Model &model, AM_t &am, string test_file, bool doco, int beam)
}

template <class AM_t>
void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k)
void test_kbest_arcs(Model &model, AM_t &am, string test_file, unsigned top_k)
{
// only suitable for monolingual setting, of predicting a sentence given preceeding sentence
cerr << "Reading test examples from " << test_file << endl;
Expand Down Expand Up @@ -369,7 +379,7 @@ void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k)
errs.push_back(i_err);
}
Expression i_nerr = sum(errs);
double loss = as_scalar(cg.incremental_forward());
double loss = as_scalar(cg.incremental_forward(i_nerr));

//cout << last_last_id << ":" << last_id << " |||";
//for (auto &w: source) cout << " " << sd.convert(w);
Expand Down Expand Up @@ -483,9 +493,9 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
}

bool first = true;
int report = 0;
unsigned report = 0;
unsigned lines = 0;
int epoch = 0;
unsigned epoch = 0;
Sentence ssent, tsent;
int docid;

Expand All @@ -497,8 +507,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
tie(ssent, tsent, docid) = devel[i];
ComputationGraph cg;
Expression alignment;
am.BuildGraph(ssent, tsent, cg, &alignment, (doco) ? context(devel, i) : nullptr);
cg.forward();
auto iloss = am.BuildGraph(ssent, tsent, cg, &alignment, (doco) ? context(devel, i) : nullptr);
cg.forward(iloss);

cout << "\n====== SENTENCE " << i << " =========\n";
am.display_ascii(ssent, tsent, cg, alignment, sd, td);
Expand Down Expand Up @@ -586,7 +596,7 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
objective = objective + fertility_nll;

// perform forward computation for aggregate objective
cg.forward();
cg.forward(objective);

// grab the parts of the objective
loss += as_scalar(cg.get_value(xent.i));
Expand All @@ -595,7 +605,7 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
if (fert)
loss_fert += as_scalar(cg.get_value(fertility_nll.i));

cg.backward();
cg.backward(objective);
sgd.update();
++lines;

Expand Down Expand Up @@ -627,8 +637,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
for (unsigned i = 0; i < devel.size(); ++i) {
tie(ssent, tsent, docid) = devel[i];
ComputationGraph cg;
am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(devel, i) : nullptr, nullptr, nullptr);
dloss += as_scalar(cg.forward());
auto idloss = am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(devel, i) : nullptr, nullptr, nullptr);
dloss += as_scalar(cg.forward(idloss));
dchars += tsent.size() - 1;
}
if (dloss < best) {
Expand Down Expand Up @@ -659,7 +669,7 @@ Corpus read_corpus(const string &filename, bool doco)
if (doco)
read_numbered_sentence_pair(line, &source, &sd, &target, &td, identifiers);
else
read_sentence_pair(line, &source, &sd, &target, &td);
read_sentence_pair(line, source, sd, target, td);
corpus.push_back(SentencePair(source, target, identifiers[0]));
stoks += source.size();
ttoks += target.size();
Expand Down

0 comments on commit 0049ada

Please sign in to comment.