Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:moses-smt/mosesdecoder
- Loading branch information
Showing
11 changed files
with
422 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#include "Rule.h" | ||
|
||
#include "Node.h" | ||
#include "Subgraph.h" | ||
|
||
namespace Moses | ||
{ | ||
namespace GHKM | ||
{ | ||
|
||
int Rule::Scope(const std::vector<Symbol> &symbols) | ||
{ | ||
int scope = 0; | ||
bool predIsNonTerm = false; | ||
if (symbols[0].GetType() == NonTerminal) { | ||
++scope; | ||
predIsNonTerm = true; | ||
} | ||
for (std::size_t i = 1; i < symbols.size(); ++i) { | ||
bool isNonTerm = symbols[i].GetType() == NonTerminal; | ||
if (isNonTerm && predIsNonTerm) { | ||
++scope; | ||
} | ||
predIsNonTerm = isNonTerm; | ||
} | ||
if (predIsNonTerm) { | ||
++scope; | ||
} | ||
return scope; | ||
} | ||
|
||
bool Rule::PartitionOrderComp(const Node *a, const Node *b) | ||
{ | ||
const Span &aSpan = a->GetSpan(); | ||
const Span &bSpan = b->GetSpan(); | ||
assert(!aSpan.empty() && !bSpan.empty()); | ||
return *(aSpan.begin()) < *(bSpan.begin()); | ||
} | ||
|
||
} // namespace GHKM | ||
} // namespace Moses |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#pragma once | ||
#ifndef EXTRACT_GHKM_RULE_H_ | ||
#define EXTRACT_GHKM_RULE_H_ | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
#include "Alignment.h" | ||
|
||
namespace Moses | ||
{ | ||
namespace GHKM | ||
{ | ||
|
||
class Node; | ||
|
||
enum SymbolType { Terminal, NonTerminal }; | ||
|
||
class Symbol { | ||
public: | ||
Symbol(const std::string &v, SymbolType t) : m_value(v) , m_type(t) {} | ||
|
||
const std::string &GetValue() const { | ||
return m_value; | ||
} | ||
SymbolType GetType() const { | ||
return m_type; | ||
} | ||
|
||
private: | ||
std::string m_value; | ||
SymbolType m_type; | ||
}; | ||
|
||
// Base class for ScfgRule and StsgRule. | ||
class Rule | ||
{ | ||
public: | ||
virtual ~Rule() {} | ||
|
||
const Alignment &GetAlignment() const { | ||
return m_alignment; | ||
} | ||
|
||
virtual int Scope() const = 0; | ||
|
||
protected: | ||
static bool PartitionOrderComp(const Node *, const Node *); | ||
|
||
static int Scope(const std::vector<Symbol>&); | ||
|
||
Alignment m_alignment; | ||
}; | ||
|
||
} // namespace GHKM | ||
} // namespace Moses | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#include "StsgRule.h" | ||
|
||
#include "Node.h" | ||
#include "Subgraph.h" | ||
#include "SyntaxTree.h" | ||
|
||
#include <algorithm> | ||
|
||
namespace Moses | ||
{ | ||
namespace GHKM | ||
{ | ||
|
||
StsgRule::StsgRule(const Subgraph &fragment) | ||
: m_targetSide(fragment, true) | ||
{ | ||
// Source side | ||
|
||
const std::set<const Node *> &sinkNodes = fragment.GetLeaves(); | ||
|
||
// Collect the subset of sink nodes that excludes target nodes with | ||
// empty spans. | ||
std::vector<const Node *> productiveSinks; | ||
productiveSinks.reserve(sinkNodes.size()); | ||
for (std::set<const Node *>::const_iterator p = sinkNodes.begin(); | ||
p != sinkNodes.end(); ++p) { | ||
const Node *sink = *p; | ||
if (!sink->GetSpan().empty()) { | ||
productiveSinks.push_back(sink); | ||
} | ||
} | ||
|
||
// Sort them into the order defined by their spans. | ||
std::sort(productiveSinks.begin(), productiveSinks.end(), PartitionOrderComp); | ||
|
||
// Build a map from target nodes to source-order indices, so that we | ||
// can construct the Alignment object later. | ||
std::map<const Node *, std::vector<int> > sinkToSourceIndices; | ||
std::map<const Node *, int> nonTermSinkToSourceIndex; | ||
|
||
m_sourceSide.reserve(productiveSinks.size()); | ||
int srcIndex = 0; | ||
int nonTermCount = 0; | ||
for (std::vector<const Node *>::const_iterator p = productiveSinks.begin(); | ||
p != productiveSinks.end(); ++p, ++srcIndex) { | ||
const Node &sink = **p; | ||
if (sink.GetType() == TREE) { | ||
m_sourceSide.push_back(Symbol("X", NonTerminal)); | ||
sinkToSourceIndices[&sink].push_back(srcIndex); | ||
nonTermSinkToSourceIndex[&sink] = nonTermCount++; | ||
} else { | ||
assert(sink.GetType() == SOURCE); | ||
m_sourceSide.push_back(Symbol(sink.GetLabel(), Terminal)); | ||
// Add all aligned target words to the sinkToSourceIndices map | ||
const std::vector<Node *> &parents(sink.GetParents()); | ||
for (std::vector<Node *>::const_iterator q = parents.begin(); | ||
q != parents.end(); ++q) { | ||
if ((*q)->GetType() == TARGET) { | ||
sinkToSourceIndices[*q].push_back(srcIndex); | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Alignment | ||
|
||
std::vector<const Node *> targetLeaves; | ||
m_targetSide.GetTargetLeaves(targetLeaves); | ||
|
||
m_alignment.reserve(targetLeaves.size()); | ||
m_nonTermAlignment.resize(nonTermCount); | ||
|
||
for (int i = 0, j = 0; i < targetLeaves.size(); ++i) { | ||
const Node *leaf = targetLeaves[i]; | ||
assert(leaf->GetType() != SOURCE); | ||
if (leaf->GetSpan().empty()) { | ||
continue; | ||
} | ||
std::map<const Node *, std::vector<int> >::iterator p = | ||
sinkToSourceIndices.find(leaf); | ||
assert(p != sinkToSourceIndices.end()); | ||
std::vector<int> &sourceNodes = p->second; | ||
for (std::vector<int>::iterator r = sourceNodes.begin(); | ||
r != sourceNodes.end(); ++r) { | ||
int srcIndex = *r; | ||
m_alignment.push_back(std::make_pair(srcIndex, i)); | ||
} | ||
if (leaf->GetType() == TREE) { | ||
m_nonTermAlignment[nonTermSinkToSourceIndex[leaf]] = j++; | ||
} | ||
} | ||
} | ||
|
||
} // namespace GHKM | ||
} // namespace Moses |
Oops, something went wrong.