From 87a4f1954619a4a7eec531349e0865b245df7a0b Mon Sep 17 00:00:00 2001 From: Christophe SERVAN Date: Mon, 16 Feb 2015 19:02:46 +0100 Subject: [PATCH 1/5] Memory leak correction in TER algorithm --- mert/Jamfile | 1 + mert/TER/alignmentStruct.cpp | 25 +- mert/TER/alignmentStruct.h | 31 +- mert/TER/bestShiftStruct.cpp | 66 ++ mert/TER/bestShiftStruct.h | 48 +- mert/TER/hashMap.cpp | 232 ++-- mert/TER/hashMap.h | 44 +- mert/TER/hashMapInfos.cpp | 239 ++-- mert/TER/hashMapInfos.h | 46 +- mert/TER/hashMapStringInfos.cpp | 313 +++--- mert/TER/hashMapStringInfos.h | 46 +- mert/TER/infosHasher.cpp | 58 +- mert/TER/infosHasher.h | 40 +- mert/TER/stringHasher.cpp | 46 +- mert/TER/stringHasher.h | 32 +- mert/TER/stringInfosHasher.cpp | 58 +- mert/TER/stringInfosHasher.h | 40 +- mert/TER/terAlignment.cpp | 339 +++--- mert/TER/terAlignment.h | 79 +- mert/TER/terShift.cpp | 164 ++- mert/TER/terShift.h | 53 +- mert/TER/tercalc.cpp | 1832 ++++++++++++++++++++----------- mert/TER/tercalc.h | 104 +- mert/TER/tools.cpp | 1224 ++++++++++++--------- mert/TER/tools.h | 116 +- 25 files changed, 3108 insertions(+), 2168 deletions(-) create mode 100644 mert/TER/bestShiftStruct.cpp diff --git a/mert/Jamfile b/mert/Jamfile index ee8a1fcc34..4dd2fb5409 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -46,6 +46,7 @@ TER/infosHasher.cpp TER/stringInfosHasher.cpp TER/tercalc.cpp TER/tools.cpp +TER/bestShiftStruct.cpp TerScorer.cpp CderScorer.cpp MeteorScorer.cpp diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp index e42ec4a140..e2a880396d 100644 --- a/mert/TER/alignmentStruct.cpp +++ b/mert/TER/alignmentStruct.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -23,15 +23,24 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace TERCpp { -string alignmentStruct::toString() -{ - stringstream s; + string alignmentStruct::toString() + { + stringstream s; // s << "nword : " << vectorToString(nwords)< @@ -34,10 +34,10 @@ using namespace Tools; namespace TERCpp { -class alignmentStruct -{ -private: -public: + class alignmentStruct + { + private: + public: // alignmentStruct(); // alignmentStruct (int _start, int _end, int _moveto, int _newloc); @@ -53,14 +53,15 @@ class alignmentStruct // int end; // int moveto; // int newloc; - vector nwords; // The words we shifted - vector alignment ; // for pra_more output - vector aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. - double cost; - string toString(); -}; + vector nwords; // The words we shifted + vector alignment ; // for pra_more output + vector aftershift; // for pra_more output + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. + double cost; + string toString(); + void set(alignmentStruct l_alignmentStruct); + }; } #endif \ No newline at end of file diff --git a/mert/TER/bestShiftStruct.cpp b/mert/TER/bestShiftStruct.cpp new file mode 100644 index 0000000000..8c27f1ff81 --- /dev/null +++ b/mert/TER/bestShiftStruct.cpp @@ -0,0 +1,66 @@ +/********************************* +tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation. + +Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France +Contact: christophe.servan@lium.univ-lemans.fr + +The tercpp tool and library are free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the licence, or +(at your option) any later version. + +This program and library are distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library; if not, write to the Free Software Foundation, +Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +**********************************/ +#include "bestShiftStruct.h" + +using namespace std; + +namespace TERCpp +{ + bestShiftStruct::bestShiftStruct() + { + m_best_shift=new terShift(); + m_best_align=new terAlignment(); + m_empty=new bool(false); + } + bestShiftStruct::~bestShiftStruct() + { + delete(m_best_align); + delete(m_best_shift); + } + void bestShiftStruct::setEmpty(bool b) + { + m_empty=new bool(b); + } + void bestShiftStruct::setBestShift(terShift * l_terShift) + { + m_best_shift->set(l_terShift); + } + void bestShiftStruct::setBestAlign(terAlignment * l_terAlignment) + { + m_best_align->set(l_terAlignment); + } + string bestShiftStruct::toString() + { + stringstream s; + s << m_best_shift->toString() << endl; + s << m_best_align->toString() << endl; +// s << (*m_empty) << endl; + } + bool bestShiftStruct::getEmpty() + { + return (*(m_empty)); + } + + + + + +} diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h index d68f2319fc..144787faae 100644 --- a/mert/TER/bestShiftStruct.h +++ b/mert/TER/bestShiftStruct.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __BESTSHIFTSTRUCT_H_ -#define __BESTSHIFTSTRUCT_H_ +#ifndef __BESTSHIFTSTRUCT_H__ +#define __BESTSHIFTSTRUCT_H__ #include @@ -36,10 +36,10 @@ using namespace Tools; namespace TERCpp { -class bestShiftStruct -{ -private: -public: + class bestShiftStruct + { + private: + public: // alignmentStruct(); // alignmentStruct (int _start, int _end, int _moveto, int _newloc); @@ -55,16 +55,36 @@ class bestShiftStruct // int end; // int moveto; // int newloc; - terShift m_best_shift; - terAlignment m_best_align; - bool m_empty; + terShift * m_best_shift; + terAlignment * m_best_align; + bool * m_empty; + bestShiftStruct(); + ~bestShiftStruct(); + inline void set(bestShiftStruct l_bestShiftStruct) + { + m_best_shift->set(l_bestShiftStruct.m_best_shift); + m_best_align->set(l_bestShiftStruct.m_best_align); + setEmpty(l_bestShiftStruct.getEmpty()); + } + inline void set(bestShiftStruct * l_bestShiftStruct) + { + m_best_shift->set(l_bestShiftStruct->m_best_shift); + m_best_align->set(l_bestShiftStruct->m_best_align); + setEmpty(l_bestShiftStruct->getEmpty()); + } + void setEmpty(bool b); + void setBestShift(terShift * l_terShift); + void setBestAlign(terAlignment * l_terAlignment); + string toString(); + bool getEmpty(); + // vector nwords; // The words we shifted // char* alignment ; // for pra_more output // vector aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. // double cost; -}; + }; } -#endif \ No newline at end of file +#endif diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp index 253fda7151..de84ff796d 100644 --- a/mert/TER/hashMap.cpp +++ b/mert/TER/hashMap.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -28,142 +28,156 @@ using namespace std; namespace HashMapSpace { // hashMap::hashMap(); -/* hashMap::~hashMap() + /* hashMap::~hashMap() + { + // vector::const_iterator del = m_hasher.begin(); + for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMap::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMap::trouve ( long searchKey ) { -// vector::const_iterator del = m_hasher.begin(); - for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } - }*/ -/** - * int hashMap::trouve ( long searchKey ) - * @param searchKey - * @return - */ -int hashMap::trouve ( long searchKey ) -{ - long foundKey; + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMap::trouve ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey;; + int hashMap::trouve ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey;; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** - * long hashMap::hashValue ( string key ) - * @param key - * @return - */ -long hashMap::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate& coll = use_facet >(loc); - return coll.hash(key.data(),key.data()+key.length()); + /** + * long hashMap::hashValue ( string key ) + * @param key + * @return + */ + long hashMap::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate& coll = use_facet >(loc); + return coll.hash(key.data(),key.data()+key.length()); // boost::hash hasher; // return hasher ( key ); -} -/** - * void hashMap::addHasher ( string key, string value ) - * @param key - * @param value - */ -void hashMap::addHasher ( string key, string value ) -{ - if ( trouve ( hashValue ( key ) ) ==0 ) { + } + /** + * void hashMap::addHasher ( string key, string value ) + * @param key + * @param value + */ + void hashMap::addHasher ( string key, string value ) + { + if ( trouve ( hashValue ( key ) ) ==0 ) + { // cerr << "ICI1" <::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + return defaut; } - } - return defaut; -} -string hashMap::getValue ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; + string hashMap::getValue ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundValue= ( *l_hasher ).getValue(); - if ( foundValue.compare ( value ) == 0 ) { - return ( *l_hasher ).getKey(); + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundValue= ( *l_hasher ).getValue(); + if ( foundValue.compare ( value ) == 0 ) + { + return ( *l_hasher ).getKey(); + } + } + return ""; } - } - return ""; -} -void hashMap::setValue ( string key , string value ) -{ - long searchKey=hashValue ( key ); - long foundKey; + void hashMap::setValue ( string key , string value ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); // return ( *l_hasher ).getValue(); + } + } } - } -} -/** - * - */ -void hashMap::printHash() -{ - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} + /** + * + */ + void hashMap::printHash() + { + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + } diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h index c2708b3600..017e6b831b 100644 --- a/mert/TER/hashMap.h +++ b/mert/TER/hashMap.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAP_H_ -#define __HASHMAP_H_ +#ifndef __HASHMAP_H__ +#define __HASHMAP_H__ #include #include "stringHasher.h" #include @@ -35,27 +35,27 @@ using namespace std; namespace HashMapSpace { -class hashMap -{ -private: - vector m_hasher; + class hashMap + { + private: + vector m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, string value ); - stringHasher getHasher ( string key ); - string getValue ( string key ); - string searchValue ( string key ); - void setValue ( string key , string value ); - void printHash(); - vector getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, string value ); + stringHasher getHasher ( string key ); + string getValue ( string key ); + string searchValue ( string key ); + void setValue ( string key , string value ); + void printHash(); + vector getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp index 0ab6d21b24..23f57d8088 100644 --- a/mert/TER/hashMapInfos.cpp +++ b/mert/TER/hashMapInfos.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -28,108 +28,117 @@ using namespace std; namespace HashMapSpace { // hashMapInfos::hashMap(); -/* hashMapInfos::~hashMap() + /* hashMapInfos::~hashMap() + { + // vector::const_iterator del = m_hasher.begin(); + for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMapInfos::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMapInfos::trouve ( long searchKey ) { -// vector::const_iterator del = m_hasher.begin(); - for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } - }*/ -/** - * int hashMapInfos::trouve ( long searchKey ) - * @param searchKey - * @return - */ -int hashMapInfos::trouve ( long searchKey ) -{ - long foundKey; + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMapInfos::trouve ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey;; + int hashMapInfos::trouve ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey;; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** - * long hashMapInfos::hashValue ( string key ) - * @param key - * @return - */ -long hashMapInfos::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate& coll = use_facet >(loc); - return coll.hash(key.data(),key.data()+key.length()); + /** + * long hashMapInfos::hashValue ( string key ) + * @param key + * @return + */ + long hashMapInfos::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate& coll = use_facet >(loc); + return coll.hash(key.data(),key.data()+key.length()); // boost::hash hasher; // return hasher ( key ); -} -/** - * void hashMapInfos::addHasher ( string key, string value ) - * @param key - * @param value - */ -void hashMapInfos::addHasher ( string key, vector value ) -{ - if ( trouve ( hashValue ( key ) ) ==0 ) { + } + /** + * void hashMapInfos::addHasher ( string key, string value ) + * @param key + * @param value + */ + void hashMapInfos::addHasher ( string key, vector value ) + { + if ( trouve ( hashValue ( key ) ) ==0 ) + { // cerr << "ICI1" < value ) -{ - addHasher ( key, value ); -} -infosHasher hashMapInfos::getHasher ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; + m_hasher.push_back ( H ); + } + } + void hashMapInfos::addValue ( string key, vector value ) + { + addHasher ( key, value ); + } + infosHasher hashMapInfos::getHasher ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + vector temp; + infosHasher defaut(0,"",temp); + return defaut; } - } - vector temp; - infosHasher defaut(0,"",temp); - return defaut; -} -vector hashMapInfos::getValue ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; - vector retour; + vector hashMapInfos::getValue ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; + vector retour; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()< hashMapInfos::getValue ( string key ) // } // -void hashMapInfos::setValue ( string key , vector value ) -{ - long searchKey=hashValue ( key ); - long foundKey; + void hashMapInfos::setValue ( string key , vector value ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); // return ( *l_hasher ).getValue(); + } + } + } + string hashMapInfos::toString () + { + stringstream to_return; + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + to_return << (*l_hasher).toString(); + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + return to_return.str(); } - } -} -string hashMapInfos::toString () -{ - stringstream to_return; - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - to_return << (*l_hasher).toString(); - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } - return to_return.str(); -} -/** - * - */ -void hashMapInfos::printHash() -{ - for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { + /** + * + */ + void hashMapInfos::printHash() + { + for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} + } + } diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h index e975aa738c..58cd50aefe 100644 --- a/mert/TER/hashMapInfos.h +++ b/mert/TER/hashMapInfos.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAPINFOS_H_ -#define __HASHMAPINFOS_H_ +#ifndef __HASHMAPINFOS_H__ +#define __HASHMAPINFOS_H__ #include #include "infosHasher.h" #include @@ -34,29 +34,29 @@ using namespace std; namespace HashMapSpace { -class hashMapInfos -{ -private: - vector m_hasher; + class hashMapInfos + { + private: + vector m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, vector value ); - void addValue ( string key, vector value ); - infosHasher getHasher ( string key ); - vector getValue ( string key ); + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, vector value ); + void addValue ( string key, vector value ); + infosHasher getHasher ( string key ); + vector getValue ( string key ); // string searchValue ( string key ); - void setValue ( string key , vector value ); - void printHash(); - string toString(); - vector getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + void setValue ( string key , vector value ); + void printHash(); + string toString(); + vector getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp index d984bdadc8..773c148d4d 100644 --- a/mert/TER/hashMapStringInfos.cpp +++ b/mert/TER/hashMapStringInfos.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,166 +27,179 @@ using namespace std; namespace HashMapSpace { -// hashMapStringInfos::hashMap(); -/* hashMapStringInfos::~hashMap() -{ -// vector::const_iterator del = m_hasher.begin(); - for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } -}*/ -/** -* int hashMapStringInfos::trouve ( long searchKey ) -* @param searchKey -* @return -*/ -int hashMapStringInfos::trouve ( long searchKey ) -{ - long foundKey; - // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + // hashMapStringInfos::hashMap(); + /* hashMapStringInfos::~hashMap() + { + // vector::const_iterator del = m_hasher.begin(); + for ( vector::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMapStringInfos::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMapStringInfos::trouve ( long searchKey ) + { + long foundKey; + // vector::const_iterator l_hasher=m_hasher.begin(); + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMapStringInfos::trouve ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey;; - // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + int hashMapStringInfos::trouve ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey;; + // vector::const_iterator l_hasher=m_hasher.begin(); + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** -* long hashMapStringInfos::hashValue ( string key ) -* @param key -* @return -*/ -long hashMapStringInfos::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate& coll = use_facet > ( loc ); - return coll.hash ( key.data(), key.data() + key.length() ); + /** + * long hashMapStringInfos::hashValue ( string key ) + * @param key + * @return + */ + long hashMapStringInfos::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate& coll = use_facet > ( loc ); + return coll.hash ( key.data(), key.data() + key.length() ); // boost::hash hasher; // return hasher ( key ); -} -/** -* void hashMapStringInfos::addHasher ( string key, string value ) -* @param key -* @param value -*/ -void hashMapStringInfos::addHasher ( string key, vector value ) -{ - if ( trouve ( hashValue ( key ) ) == 0 ) { - // cerr << "ICI1" < value ) -{ - addHasher ( key, value ); -} -stringInfosHasher hashMapStringInfos::getHasher ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey; - // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); } - } - vector tmp; - stringInfosHasher defaut ( 0, "", tmp ); - return defaut; -} -vector hashMapStringInfos::getValue ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey; - vector retour; - // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()< value ) + { + if ( trouve ( hashValue ( key ) ) == 0 ) + { + // cerr << "ICI1" < foundValue; -// -// // vector::const_iterator l_hasher=m_hasher.begin(); -// for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) -// { -// foundValue= ( *l_hasher ).getValue(); -// /* if ( foundValue.compare ( value ) == 0 ) -// { -// return ( *l_hasher ).getKey(); -// }*/ -// } -// return ""; -// } -// - -void hashMapStringInfos::setValue ( string key , vector value ) -{ - long searchKey = hashValue ( key ); - long foundKey; - // vector::const_iterator l_hasher=m_hasher.begin(); - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); - // return ( *l_hasher ).getValue(); + void hashMapStringInfos::addValue ( string key, vector value ) + { + addHasher ( key, value ); + } + stringInfosHasher hashMapStringInfos::getHasher ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey; + // vector::const_iterator l_hasher=m_hasher.begin(); + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + vector tmp; + stringInfosHasher defaut ( 0, "", tmp ); + return defaut; + } + vector hashMapStringInfos::getValue ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey; + vector retour; + // vector::const_iterator l_hasher=m_hasher.begin(); + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()< foundValue; + // + // // vector::const_iterator l_hasher=m_hasher.begin(); + // for ( vector:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + // { + // foundValue= ( *l_hasher ).getValue(); + // /* if ( foundValue.compare ( value ) == 0 ) + // { + // return ( *l_hasher ).getKey(); + // }*/ + // } + // return ""; + // } + // + + void hashMapStringInfos::setValue ( string key , vector value ) + { + long searchKey = hashValue ( key ); + long foundKey; + // vector::const_iterator l_hasher=m_hasher.begin(); + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); + // return ( *l_hasher ).getValue(); + } + } } - } -} -string hashMapStringInfos::toString () -{ - stringstream to_return; - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - to_return << (*l_hasher).toString(); - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } - return to_return.str(); -} + string hashMapStringInfos::toString () + { + stringstream to_return; + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + to_return << (*l_hasher).toString(); + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + return to_return.str(); + } -/** -* -*/ -void hashMapStringInfos::printHash() -{ - for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} -vector< stringInfosHasher > hashMapStringInfos::getHashMap() -{ - return m_hasher; -} + /** + * + */ + void hashMapStringInfos::printHash() + { + for ( vector:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + } + vector< stringInfosHasher > hashMapStringInfos::getHashMap() + { + return m_hasher; + } diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h index a0eae951db..3ea3794e53 100644 --- a/mert/TER/hashMapStringInfos.h +++ b/mert/TER/hashMapStringInfos.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAPSTRINGINFOS_H_ -#define __HASHMAPSTRINGINFOS_H_ +#ifndef __HASHMAPSTRINGINFOS_H__ +#define __HASHMAPSTRINGINFOS_H__ #include #include "stringInfosHasher.h" #include @@ -34,29 +34,29 @@ using namespace std; namespace HashMapSpace { -class hashMapStringInfos -{ -private: - vector m_hasher; + class hashMapStringInfos + { + private: + vector m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, vector value ); - void addValue ( string key, vector value ); - stringInfosHasher getHasher ( string key ); - vector getValue ( string key ); + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, vector value ); + void addValue ( string key, vector value ); + stringInfosHasher getHasher ( string key ); + vector getValue ( string key ); // string searchValue ( string key ); - void setValue ( string key , vector value ); - void printHash(); - string toString(); - vector getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + void setValue ( string key , vector value ); + void printHash(); + string toString(); + vector getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp index 450b70d944..8ce23ae443 100644 --- a/mert/TER/infosHasher.cpp +++ b/mert/TER/infosHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,35 +27,35 @@ using namespace Tools; namespace HashMapSpace { -infosHasher::infosHasher (long cle,string cleTxt, vector valueVecInt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueVecInt; -} + infosHasher::infosHasher (long cle,string cleTxt, vector valueVecInt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueVecInt; + } // infosHasher::~infosHasher(){};*/ -long infosHasher::getHashKey() -{ - return m_hashKey; -} -string infosHasher::getKey() -{ - return m_key; -} -vector infosHasher::getValue() -{ - return m_value; -} -void infosHasher::setValue ( vector value ) -{ - m_value=value; -} -string infosHasher::toString() -{ - stringstream to_return; - to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; - return to_return.str(); -} + long infosHasher::getHashKey() + { + return m_hashKey; + } + string infosHasher::getKey() + { + return m_key; + } + vector infosHasher::getValue() + { + return m_value; + } + void infosHasher::setValue ( vector value ) + { + m_value=value; + } + string infosHasher::toString() + { + stringstream to_return; + to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; + return to_return.str(); + } // typedef stdext::hash_map HASH_S_S; diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h index ab9c7b5ed2..692bde49d3 100644 --- a/mert/TER/infosHasher.h +++ b/mert/TER/infosHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __INFOSHASHER_H_ -#define __INFOSHASHER_H_ +#ifndef __INFOSHASHER_H__ +#define __INFOSHASHER_H__ #include // #include #include @@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace HashMapSpace { -class infosHasher -{ -private: - long m_hashKey; - string m_key; - vector m_value; - -public: - infosHasher ( long cle, string cleTxt, vector valueVecInt ); - long getHashKey(); - string getKey(); - vector getValue(); - void setValue ( vector value ); - string toString(); - - -}; + class infosHasher + { + private: + long m_hashKey; + string m_key; + vector m_value; + + public: + infosHasher ( long cle, string cleTxt, vector valueVecInt ); + long getHashKey(); + string getKey(); + vector getValue(); + void setValue ( vector value ); + string toString(); + + + }; } diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp index 729310352d..f4d1526e8b 100644 --- a/mert/TER/stringHasher.cpp +++ b/mert/TER/stringHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -26,29 +26,29 @@ using namespace std; namespace HashMapSpace { -stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueTxt; -} + stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueTxt; + } // stringHasher::~stringHasher(){};*/ -long stringHasher::getHashKey() -{ - return m_hashKey; -} -string stringHasher::getKey() -{ - return m_key; -} -string stringHasher::getValue() -{ - return m_value; -} -void stringHasher::setValue ( string value ) -{ - m_value=value; -} + long stringHasher::getHashKey() + { + return m_hashKey; + } + string stringHasher::getKey() + { + return m_key; + } + string stringHasher::getValue() + { + return m_value; + } + void stringHasher::setValue ( string value ) + { + m_value=value; + } // typedef stdext::hash_map HASH_S_S; diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h index 5b0ccfc944..e2a79834c0 100644 --- a/mert/TER/stringHasher.h +++ b/mert/TER/stringHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __STRINGHASHER_H_ -#define __STRINGHASHER_H_ +#ifndef __STRINGHASHER_H__ +#define __STRINGHASHER_H__ #include //#include #include @@ -28,22 +28,22 @@ using namespace std; namespace HashMapSpace { -class stringHasher -{ -private: - long m_hashKey; - string m_key; - string m_value; + class stringHasher + { + private: + long m_hashKey; + string m_key; + string m_value; -public: - stringHasher ( long cle, string cleTxt, string valueTxt ); - long getHashKey(); - string getKey(); - string getValue(); - void setValue ( string value ); + public: + stringHasher ( long cle, string cleTxt, string valueTxt ); + long getHashKey(); + string getKey(); + string getValue(); + void setValue ( string value ); -}; + }; } diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp index ecbc10fa50..007fd720f2 100644 --- a/mert/TER/stringInfosHasher.cpp +++ b/mert/TER/stringInfosHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,35 +27,35 @@ using namespace Tools; namespace HashMapSpace { -stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector valueVecInt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueVecInt; -} + stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector valueVecInt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueVecInt; + } // stringInfosHasher::~stringInfosHasher(){};*/ -long stringInfosHasher::getHashKey() -{ - return m_hashKey; -} -string stringInfosHasher::getKey() -{ - return m_key; -} -vector stringInfosHasher::getValue() -{ - return m_value; -} -void stringInfosHasher::setValue ( vector value ) -{ - m_value=value; -} -string stringInfosHasher::toString() -{ - stringstream to_return; - to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; - return to_return.str(); -} + long stringInfosHasher::getHashKey() + { + return m_hashKey; + } + string stringInfosHasher::getKey() + { + return m_key; + } + vector stringInfosHasher::getValue() + { + return m_value; + } + void stringInfosHasher::setValue ( vector value ) + { + m_value=value; + } + string stringInfosHasher::toString() + { + stringstream to_return; + to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; + return to_return.str(); + } // typedef stdext::hash_map HASH_S_S; diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h index e4369f27a0..f35e4596ba 100644 --- a/mert/TER/stringInfosHasher.h +++ b/mert/TER/stringInfosHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __STRINGINFOSHASHER_H_ -#define __STRINGINFOSHASHER_H_ +#ifndef __STRINGINFOSHASHER_H__ +#define __STRINGINFOSHASHER_H__ #include // #include #include @@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace HashMapSpace { -class stringInfosHasher -{ -private: - long m_hashKey; - string m_key; - vector m_value; - -public: - stringInfosHasher ( long cle, string cleTxt, vector valueVecInt ); - long getHashKey(); - string getKey(); - vector getValue(); - void setValue ( vector value ); - string toString(); - - -}; + class stringInfosHasher + { + private: + long m_hashKey; + string m_key; + vector m_value; + + public: + stringInfosHasher ( long cle, string cleTxt, vector valueVecInt ); + long getHashKey(); + string getKey(); + vector getValue(); + void setValue ( vector value ); + string toString(); + + + }; } diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp index ec7bcafb76..dda4a42392 100644 --- a/mert/TER/terAlignment.cpp +++ b/mert/TER/terAlignment.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -24,163 +24,244 @@ using namespace std; namespace TERCpp { -terAlignment::terAlignment() -{ + terAlignment::terAlignment() + { // vector ref; // vector hyp; // vector aftershift; - // TERshift[] allshifts = null; + // TERshift[] allshifts = null; - numEdits=0; - numWords=0; - bestRef=""; + numEdits=0; + numWords=0; +// bestRef=""; - numIns=0; - numDel=0; - numSub=0; - numSft=0; - numWsf=0; -} -string terAlignment::toString() -{ - stringstream s; - s.str ( "" ); - s << "Original Ref: \t" << join ( " ", ref ) << endl; - s << "Original Hyp: \t" << join ( " ", hyp ) <((int)l_terAlignment.allshifts.size()))); + for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++) + { + allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i)); + } + + } + void terAlignment::set(terAlignment* l_terAlignment) + { + numEdits=l_terAlignment->numEdits; + numWords=l_terAlignment->numWords; + bestRef=l_terAlignment->bestRef; + numIns=l_terAlignment->numIns; + numDel=l_terAlignment->numDel; + numSub=l_terAlignment->numSub; + numSft=l_terAlignment->numSft; + numWsf=l_terAlignment->numWsf; + averageWords=l_terAlignment->averageWords; + ref=l_terAlignment->ref; + hyp=l_terAlignment->hyp; + aftershift=l_terAlignment->aftershift; +// allshifts=l_terAlignment->allshifts; + hyp_int=l_terAlignment->hyp_int; + aftershift_int=l_terAlignment->aftershift_int; + alignment=l_terAlignment->alignment; + allshifts=(*(new vector((int)l_terAlignment->allshifts.size()))); + for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++) + { + allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i)); + } + + } + + string terAlignment::toString() + { + stringstream s; + s.str ( "" ); + s << "Original Ref: \t" << join ( " ", ref ) << endl; + s << "Original Hyp: \t" << join ( " ", hyp ) <0 ) { - s << "Alignment: ("; + if ( ( int ) sizeof ( alignment ) >0 ) + { + s << "Alignment: ("; // s += "\nAlignment: ("; - for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) { - s << alignment[i]; + for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) + { + s << alignment[i]; // s+=alignment[i]; - } + } // s += ")"; - s << ")"; - } - s << endl; - if ( ( int ) allshifts.size() == 0 ) { + s << ")"; + } + s << endl; + if ( ( int ) allshifts.size() == 0 ) + { // s += "\nNumShifts: 0"; - s << "NumShifts: 0"; - } else { + s << "NumShifts: 0"; + } + else + { // s += "\nNumShifts: " + (int)allshifts.size(); - s << "NumShifts: "<< ( int ) allshifts.size(); - for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { - s << endl << " " ; - s << ( ( terShift ) allshifts[i] ).toString(); + s << "NumShifts: "<< ( int ) allshifts.size(); + for ( int i = 0; i < ( int ) allshifts.size(); i++ ) + { + s << endl << " " ; + s << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; - } - } - s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; + } + } + s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; // s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")"; - return s.str(); + return s.str(); -} -string terAlignment::join ( string delim, vector arr ) -{ - if ( ( int ) arr.size() == 0 ) return ""; + } + string terAlignment::join ( string delim, vector arr ) + { + if ( ( int ) arr.size() == 0 ) return ""; // if ((int)delim.compare("") == 0) delim = new String(""); // String s = new String(""); - stringstream s; - s.str ( "" ); - for ( int i = 0; i < ( int ) arr.size(); i++ ) { - if ( i == 0 ) { - s << arr.at ( i ); - } else { - s << delim << arr.at ( i ); - } - } - return s.str(); + stringstream s; + s.str ( "" ); + for ( int i = 0; i < ( int ) arr.size(); i++ ) + { + if ( i == 0 ) + { + s << arr.at ( i ); + } + else + { + s << delim << arr.at ( i ); + } + } + return s.str(); // return ""; -} -double terAlignment::score() -{ - if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) { - return 1.0; - } - if ( numWords <= 0.0 ) { - return 0.0; - } - return ( double ) numEdits / numWords; -} -double terAlignment::scoreAv() -{ - if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) { - return 1.0; - } - if ( averageWords <= 0.0 ) { - return 0.0; - } - return ( double ) numEdits / averageWords; -} - -void terAlignment::scoreDetails() -{ - numIns = numDel = numSub = numWsf = numSft = 0; - if((int)allshifts.size()>0) { - for(int i = 0; i < (int)allshifts.size(); ++i) { - numWsf += allshifts[i].size(); } - numSft = allshifts.size(); - } - - if((int)alignment.size()>0 ) { - for(int i = 0; i < (int)alignment.size(); ++i) { - switch (alignment[i]) { - case 'S': - case 'T': - numSub++; - break; - case 'D': - numDel++; - break; - case 'I': - numIns++; - break; - } + double terAlignment::score() + { + if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) + { + return 1.0; + } + if ( numWords <= 0.0 ) + { + return 0.0; + } + return ( double ) numEdits / numWords; } - } - // if(numEdits != numSft + numDel + numIns + numSub) - // System.out.println("** Error, unmatch edit erros " + numEdits + - // " vs " + (numSft + numDel + numIns + numSub)); -} -string terAlignment::printAlignments() -{ - stringstream to_return; - for(int i = 0; i < (int)alignment.size(); ++i) { - char alignInfo=alignment.at(i); - if (alignInfo == 'A' ) { - alignInfo='A'; + double terAlignment::scoreAv() + { + if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) + { + return 1.0; + } + if ( averageWords <= 0.0 ) + { + return 0.0; + } + return ( double ) numEdits / averageWords; } - if (i==0) { - to_return << alignInfo; - } else { - to_return << " " << alignInfo; - } + void terAlignment::scoreDetails() + { + numIns = numDel = numSub = numWsf = numSft = 0; + if((int)allshifts.size()>0) + { + for(int i = 0; i < (int)allshifts.size(); ++i) + { + numWsf += allshifts[i].size(); + } + numSft = allshifts.size(); + } + + if((int)alignment.size()>0 ) + { + for(int i = 0; i < (int)alignment.size(); ++i) + { + switch (alignment[i]) + { + case 'S': + case 'T': + numSub++; + break; + case 'D': + numDel++; + break; + case 'I': + numIns++; + break; + } + } + } + // if(numEdits != numSft + numDel + numIns + numSub) + // System.out.println("** Error, unmatch edit erros " + numEdits + + // " vs " + (numSft + numDel + numIns + numSub)); + } + string terAlignment::printAlignments() + { + stringstream to_return; + for(int i = 0; i < (int)alignment.size(); ++i) + { + char alignInfo=alignment.at(i); + if (alignInfo == 'A' ) + { + alignInfo='A'; + } + + if (i==0) + { + to_return << alignInfo; + } + else + { + to_return << " " << alignInfo; + } + } + return to_return.str(); } - return to_return.str(); -} string terAlignment::printAllShifts() { - stringstream to_return; - if ( ( int ) allshifts.size() == 0 ) { + stringstream to_return; + if ( ( int ) allshifts.size() == 0 ) + { // s += "\nNumShifts: 0"; - to_return << "NbrShifts: 0"; - } else { + to_return << "NbrShifts: 0"; + } + else + { // s += "\nNumShifts: " + (int)allshifts.size(); - to_return << "NbrShifts: "<< ( int ) allshifts.size(); - for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { - to_return << "\t" ; - to_return << ( ( terShift ) allshifts[i] ).toString(); + to_return << "NbrShifts: "<< ( int ) allshifts.size(); + for ( int i = 0; i < ( int ) allshifts.size(); i++ ) + { + to_return << "\t" ; + to_return << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; - } - } - return to_return.str(); + } + } + return to_return.str(); } } \ No newline at end of file diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h index 2af0b74900..e9524df7c6 100644 --- a/mert/TER/terAlignment.h +++ b/mert/TER/terAlignment.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TERALIGNMENT_H_ -#define MERT_TER_TERALIGNMENT_H_ +#ifndef __TERCPPTERALIGNMENT_H__ +#define __TERCPPTERALIGNMENT_H__ #include @@ -34,41 +34,44 @@ using namespace std; namespace TERCpp { -class terAlignment -{ -private: -public: - - terAlignment(); - string toString(); - void scoreDetails(); - - vector ref; - vector hyp; - vector aftershift; - vector allshifts; - vector hyp_int; - vector aftershift_int; - - double numEdits; - double numWords; - double averageWords; - vector alignment; - string bestRef; - - int numIns; - int numDel; - int numSub; - int numSft; - int numWsf; - - - string join ( string delim, vector arr ); - double score(); - double scoreAv(); - string printAlignments(); - string printAllShifts(); -}; + class terAlignment + { + private: + public: + + vector ref; + vector hyp; + vector aftershift; + vector allshifts; + vector hyp_int; + vector aftershift_int; + + double numEdits; + double numWords; + double averageWords; + vector alignment; + string bestRef; + + int numIns; + int numDel; + int numSub; + int numSft; + int numWsf; + + + terAlignment(); + string toString(); + void scoreDetails(); + + + string join ( string delim, vector arr ); + double score(); + double scoreAv(); + string printAlignments(); + string printAllShifts(); + void set(terAlignment& l_terAlignment); + void set(terAlignment* l_terAlignment); + }; } #endif \ No newline at end of file diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp index 440b4d2ceb..e271ad6a7d 100644 --- a/mert/TER/terShift.cpp +++ b/mert/TER/terShift.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -42,32 +42,70 @@ namespace TERCpp // numSft=0; // numWsf=0; // } -terShift::terShift () -{ - start = 0; - end = 0; - moveto = 0; - newloc = 0; - cost=1.0; -} -terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) -{ - start = _start; - end = _end; - moveto = _moveto; - newloc = _newloc; - cost=1.0; -} + terShift::terShift () + { + start = 0; + end = 0; + moveto = 0; + newloc = 0; + cost=1.0; + shifted.clear(); + alignment.clear(); + aftershift.clear(); + } + terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) + { + start = _start; + end = _end; + moveto = _moveto; + newloc = _newloc; + cost=1.0; + } -terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector _shifted ) -{ - start = _start; - end = _end; - moveto = _moveto; - newloc = _newloc; - shifted = _shifted; - cost=1.0; -} + terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector _shifted ) + { + start = _start; + end = _end; + moveto = _moveto; + newloc = _newloc; + shifted = _shifted; + cost=1.0; + } + void terShift::set(terShift l_terShift) + { + start=l_terShift.start; + end=l_terShift.end; + moveto=l_terShift.moveto; + newloc=l_terShift.newloc; + shifted=l_terShift.shifted; +// alignment=l_terShift.alignment; +// aftershift=l_terShift.aftershift; + } + void terShift::set(terShift *l_terShift) + { + start=l_terShift->start; + end=l_terShift->end; + moveto=l_terShift->moveto; + newloc=l_terShift->newloc; + shifted=l_terShift->shifted; +// alignment=l_terShift->alignment; +// aftershift=l_terShift->aftershift; + } + + void terShift::erase() + { + start = 0; + end = 0; + moveto = 0; + newloc = 0; + cost=1.0; + shifted.clear(); + alignment.clear(); + aftershift.clear(); + } + + + // string terShift::vectorToString(vector vec) // { // string retour(""); @@ -78,38 +116,54 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector 0 ) { - s << " (" << vectorToString ( shifted ) << ")"; - } - return s.str(); -} + string terShift::toString() + { + stringstream s; + s.str ( "" ); + s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]"; + if ( ( int ) shifted.size() > 0 ) + { + s << " (" << vectorToString ( shifted ) << ")"; + } +// s<< endl; +// if ( ( int ) shifted.size() > 0 ) +// { +// s << " (" << vectorToString ( alignment ) << ")"; +// } +// s<< endl; +// if ( ( int ) shifted.size() > 0 ) +// { +// s << " (" << vectorToString ( aftershift ) << ")"; +// } + return s.str(); + } -/* The distance of the shift. */ -int terShift::distance() -{ - if ( moveto < start ) { - return start - moveto; - } else if ( moveto > end ) { - return moveto - end; - } else { - return moveto - start; - } -} + /* The distance of the shift. */ + int terShift::distance() + { + if ( moveto < start ) + { + return start - moveto; + } + else if ( moveto > end ) + { + return moveto - end; + } + else + { + return moveto - start; + } + } -bool terShift::leftShift() -{ - return ( moveto < start ); -} + bool terShift::leftShift() + { + return ( moveto < start ); + } -int terShift::size() -{ - return ( end - start ) + 1; -} + int terShift::size() + { + return ( end - start ) + 1; + } // terShift terShift::operator=(terShift t) // { // diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h index 74545e0de6..65a812d155 100644 --- a/mert/TER/terShift.h +++ b/mert/TER/terShift.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TERSHIFT_H_ -#define MERT_TER_TERSHIFT_H_ +#ifndef __TERCPPTERSHIFT_H__ +#define __TERCPPTERSHIFT_H__ #include @@ -34,32 +34,35 @@ using namespace Tools; namespace TERCpp { -class terShift -{ -private: -public: + class terShift + { + private: + public: - terShift(); - terShift ( int _start, int _end, int _moveto, int _newloc ); - terShift ( int _start, int _end, int _moveto, int _newloc, vector _shifted ); - string toString(); - int distance() ; - bool leftShift(); - int size(); + terShift(); + terShift ( int _start, int _end, int _moveto, int _newloc ); + terShift ( int _start, int _end, int _moveto, int _newloc, vector _shifted ); + string toString(); + int distance() ; + bool leftShift(); + int size(); // terShift operator=(terShift t); // string vectorToString(vector vec); - int start; - int end; - int moveto; - int newloc; - vector shifted; // The words we shifted - vector alignment ; // for pra_more output - vector aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. - double cost; -}; + int start; + int end; + int moveto; + int newloc; + vector shifted; // The words we shifted + vector alignment ; // for pra_more output + vector aftershift; // for pra_more output + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. + double cost; + void set(terShift l_terShift); + void set(terShift *l_terShift); + void erase(); + }; } #endif \ No newline at end of file diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp index c4629c639c..8a84b49b3c 100644 --- a/mert/TER/tercalc.cpp +++ b/mert/TER/tercalc.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -35,724 +35,1238 @@ using namespace Tools; namespace TERCpp { -terCalc::terCalc() -{ - TAILLE_PERMUT_MAX = 50; - infinite = 999999.0; - shift_cost = 1.0; - insert_cost = 1.0; - delete_cost = 1.0; - substitute_cost = 1.0; - match_cost = 0.0; - NBR_SEGS_EVALUATED = 0; - NBR_PERMUTS_CONSID = 0; - NBR_BS_APPELS = 0; - TAILLE_BEAM = 20; - DIST_MAX_PERMUT = 50; - PRINT_DEBUG = false; - hypSpans.clear(); - refSpans.clear(); -} - - -terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref ) -{ + terCalc::terCalc() + { + TAILLE_PERMUT_MAX = 10; + NBR_PERMUT_MAX = 10; + infinite = 99999.0; + shift_cost = 1.0; + insert_cost = 1.0; + delete_cost = 1.0; + substitute_cost = 1.0; + match_cost = 0.0; + NBR_SEGS_EVALUATED = 0; + NBR_PERMUTS_CONSID = 0; + NBR_BS_APPELS = 0; + TAILLE_BEAM = 10; + DIST_MAX_PERMUT = 25; + PRINT_DEBUG = false; + hypSpans.clear(); + refSpans.clear(); + CALL_TER_ALIGN=0; + CALL_CALC_PERMUT=0; + CALL_FIND_BSHIFT=0; + MAX_LENGTH_SENTENCE=10; + S = new vector < vector < double > >(MAX_LENGTH_SENTENCE, std::vector(MAX_LENGTH_SENTENCE,0.0)); + P = new vector < vector < char > >(MAX_LENGTH_SENTENCE, std::vector(MAX_LENGTH_SENTENCE,' ')); + } - return minimizeDistanceEdition ( hyp, ref, hypSpans ); + terCalc::~terCalc() + { + delete(S); + delete(P); + } -} -terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref ) -{ - stringstream s; - s.str ( "" ); - string stringRef ( "" ); - string stringHyp ( "" ); - for ( vector::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) { - if ( l_it == ref.begin() ) { - s << ( *l_it ); - } else { - s << " " << ( *l_it ); + terAlignment terCalc::WERCalculation ( vector< string >& hyp , vector< string >& ref ) + { + + return minimizeDistanceEdition ( hyp, ref, hypSpans ); + } - } - stringRef = s.str(); - s.str ( "" ); - for ( vector::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) { - if ( l_itHyp == hyp.begin() ) { - s << ( *l_itHyp ); - } else { - s << " " << ( *l_itHyp ); + + terAlignment terCalc::TER ( vector< int >& hyp, vector< int >& ref ) + { + stringstream s; + s.str ( "" ); + string stringRef ( "" ); + string stringHyp ( "" ); + for ( vector::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) + { + if ( l_it == ref.begin() ) + { + s << ( *l_it ); + } + else + { + s << " " << ( *l_it ); + } + } + stringRef = s.str(); + s.str ( "" ); + for ( vector::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) + { + if ( l_itHyp == hyp.begin() ) + { + s << ( *l_itHyp ); + } + else + { + s << " " << ( *l_itHyp ); + } + } + stringHyp = s.str(); + s.str ( "" ); + vector l_vref=stringToVector ( stringRef , " " ); + vector l_vhyp=stringToVector ( stringHyp , " " ); + return TER ( l_vhyp , l_vref); } - } - stringHyp = s.str(); - s.str ( "" ); - return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) ); -} -hashMapInfos terCalc::createConcordMots ( vector hyp, vector ref ) -{ - hashMap tempHash; - hashMapInfos retour; - for ( int i = 0; i < ( int ) hyp.size(); i++ ) { - tempHash.addHasher ( hyp.at ( i ), "" ); - } - bool cor[ref.size() ]; - for ( int i = 0; i < ( int ) ref.size(); i++ ) { - if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) { - cor[i] = true; - } else { - cor[i] = false; - } - } - for ( int start = 0; start < ( int ) ref.size(); start++ ) { - if ( cor[start] ) { - for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) { - vector ajouter = subVector ( ref, start, end + 1 ); - string ajouterString = vectorToString ( ajouter ); - vector values = retour.getValue ( ajouterString ); - values.push_back ( start ); - if ( values.size() > 1 ) { - retour.setValue ( ajouterString, values ); - } else { - retour.addValue ( ajouterString, values ); + hashMapInfos terCalc::createConcordMots ( vector< string >& hyp, vector< string >& ref ) + { + hashMap tempHash; + hashMapInfos retour; + for ( int i = 0; i < ( int ) hyp.size(); i++ ) + { + tempHash.addHasher ( hyp.at ( i ), "" ); + } + bool cor[ref.size() ]; + for ( int i = 0; i < ( int ) ref.size(); i++ ) + { + if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) + { + cor[i] = true; + } + else + { + cor[i] = false; + } + } + for ( int start = 0; start < ( int ) ref.size(); start++ ) + { + if ( cor[start] ) + { + for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ ) + { + vector ajouter = subVector ( ref, start, end + 1 ); + string ajouterString = vectorToString ( ajouter ); + vector values = retour.getValue ( ajouterString ); + values.push_back ( start ); + if ( values.size() > 1 ) + { + retour.setValue ( ajouterString, values ); + } + else + { + retour.addValue ( ajouterString, values ); + } + } + } } - } + return retour; } - } - return retour; -} - -bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan ) -{ - if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) { - return true; - } - return false; -} + bool terCalc::trouverIntersection ( vecInt& refSpan, vecInt& hypSpan ) + { + if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) + { + return true; + } + return false; + } -terAlignment terCalc::minimizeDistanceEdition ( vector hyp, vector ref, vector curHypSpans ) -{ - double current_best = infinite; - double last_best = infinite; - int first_good = 0; - int current_first_good = 0; - int last_good = -1; - int cur_last_good = 0; - int last_peak = 0; - int cur_last_peak = 0; - int i, j; - double cost, icost, dcost; - double score; + terAlignment terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans ) + { + double current_best = infinite; + double last_best = infinite; + int first_good = 0; + int current_first_good = 0; + int last_good = -1; + int cur_last_good = 0; + int last_peak = 0; + int cur_last_peak = 0; + int i=0; + int j=0; + int ref_size=0 ; + ref_size=( int ) ref.size(); + int hyp_size=0; + hyp_size=( int ) hyp.size(); + double cost, icost, dcost; + double score; + delete(S); + delete(P); + S = new vector < vector < double > >(ref_size+1, std::vector(hyp_size+1,-1.0)); + P = new vector < vector < char > >(ref_size+1, std::vector(hyp_size+1,'0')); - NBR_BS_APPELS++; + + NBR_BS_APPELS++; +// cerr << "Appels : " << NBR_BS_APPELS << endl; + +// for ( i = 0; i <= ref_size; i++ ) +// { +// for ( j = 0; j <= hyp_size; j++ ) +// { +// S->at(i).at(j) = -1.0; +// P->at(i).at(j) = '0'; +// } +// } + S->at(0).at(0) = 0.0; + for ( j = 0; j <= hyp_size; j++ ) + { + last_best = current_best; + current_best = infinite; + first_good = current_first_good; + current_first_good = -1; + last_good = cur_last_good; + cur_last_good = -1; + last_peak = cur_last_peak; + cur_last_peak = 0; + for ( i = first_good; i <= ref_size; i++ ) + { + if ( i > last_good ) + { + break; + } + if ( S->at(i).at(j) < 0 ) + { + continue; + } + score = S->at(i).at(j); + if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) ) + { + continue; + } + if ( current_first_good == -1 ) + { + current_first_good = i ; + } + if ( ( i < ref_size ) && ( j < hyp_size ) ) + { + if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) + { + if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) + { + cost = match_cost + score; + if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'A'; + } + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1; + } + } + else + { + cost = substitute_cost + score; + if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'S'; + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1 ; + } + } + } + } + } + cur_last_good = i + 1; + if ( j < hyp_size ) + { + icost = score + insert_cost; + if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) ) + { + S->at(i).at(j+1) = icost; + P->at(i).at(j+1) = 'I'; + if ( ( cur_last_peak < i ) && ( current_best == icost ) ) + { + cur_last_peak = i; + } + } + } + if ( i < ref_size ) + { + dcost = score + delete_cost; + if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) ) + { + S->at(i+1).at(j) = dcost; + P->at(i+1).at(j) = 'D'; + if ( i >= last_good ) + { + last_good = i + 1 ; + } + } + } + } + } - for ( i = 0; i <= ( int ) ref.size(); i++ ) { - for ( j = 0; j <= ( int ) hyp.size(); j++ ) { - S[i][j] = -1.0; - P[i][j] = '0'; - } - } - S[0][0] = 0.0; - for ( j = 0; j <= ( int ) hyp.size(); j++ ) { - last_best = current_best; - current_best = infinite; - first_good = current_first_good; - current_first_good = -1; - last_good = cur_last_good; - cur_last_good = -1; - last_peak = cur_last_peak; - cur_last_peak = 0; - for ( i = first_good; i <= ( int ) ref.size(); i++ ) { - if ( i > last_good ) { - break; - } - if ( S[i][j] < 0 ) { - continue; - } - score = S[i][j]; - if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) { - continue; - } - if ( current_first_good == -1 ) { - current_first_good = i ; - } - if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) { - if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) { - if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) { - cost = match_cost + score; - if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) { - S[i+1][j+1] = cost; - P[i+1][j+1] = 'A'; + int tracelength = 0; + i = ref.size(); + j = hyp.size(); + while ( ( i > 0 ) || ( j > 0 ) ) + { + tracelength++; + if ( P->at(i).at(j) == 'A' ) + { + i--; + j--; } - if ( cost < current_best ) { - current_best = cost; + else + if ( P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if ( P->at(i).at(j) == 'D' ) + { + i--; + } + else + if ( P->at(i).at(j) == 'I' ) + { + j--; + } + else + { + cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P->at(i).at(j) << endl; + exit ( -1 ); + } + } + vector path ( tracelength ); + i = ref.size(); + j = hyp.size(); + while ( ( i > 0 ) || ( j > 0 ) ) + { + path[--tracelength] = P->at(i).at(j); + if ( P->at(i).at(j) == 'A' ) + { + i--; + j--; } - if ( current_best == cost ) { - cur_last_peak = i + 1; + else + if ( P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if ( P->at(i).at(j) == 'D' ) + { + i--; + } + else + if ( P->at(i).at(j) == 'I' ) + { + j--; + } + } + terAlignment to_return; + to_return.numWords = ref_size; + to_return.alignment = path; + to_return.numEdits = S->at(ref_size).at(hyp_size); + to_return.hyp = hyp; + to_return.ref = ref; + to_return.averageWords = ref_size; + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl; + } + return to_return; + + } + void terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans, terAlignment* to_return ) + { + double current_best = infinite; + double last_best = infinite; + int first_good = 0; + int current_first_good = 0; + int last_good = -1; + int cur_last_good = 0; + int last_peak = 0; + int cur_last_peak = 0; + int i=0; + int j=0; + int ref_size=0 ; + ref_size=( int ) ref.size(); + int hyp_size=0; + hyp_size=( int ) hyp.size(); + double cost, icost, dcost; + double score; + delete(S); + delete(P); + S = new vector < vector < double > >(ref_size+1, std::vector(hyp_size+1,-1.0)); + P = new vector < vector < char > >(ref_size+1, std::vector(hyp_size+1,'0')); + + NBR_BS_APPELS++; +// cerr << "Appels : " << NBR_BS_APPELS << endl; + +// for ( i = 0; i <= ref_size; i++ ) +// { +// for ( j = 0; j <= hyp_size; j++ ) +// { +// S->at(i).at(j) = -1.0; +// P->at(i).at(j) = '0'; +// } +// } + S->at(0).at(0) = 0.0; + for ( j = 0; j <= hyp_size; j++ ) + { + last_best = current_best; + current_best = infinite; + first_good = current_first_good; + current_first_good = -1; + last_good = cur_last_good; + cur_last_good = -1; + last_peak = cur_last_peak; + cur_last_peak = 0; + for ( i = first_good; i <= ref_size; i++ ) + { + if ( i > last_good ) + { + break; + } + if (S->at(i).at(j) < 0 ) + { + continue; + } + score = S->at(i).at(j); + if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) ) + { + continue; + } + if ( current_first_good == -1 ) + { + current_first_good = i ; + } + if ( ( i < ref_size ) && ( j < hyp_size ) ) + { + if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) + { + if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) + { + cost = match_cost + score; + if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'A'; + } + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1; + } + } + else + { + cost = substitute_cost + score; + if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'S'; + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1 ; + } + } + } + } + } + cur_last_good = i + 1; + if ( j < hyp_size ) + { + icost = score + insert_cost; + if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) ) + { + S->at(i).at(j+1) = icost; + P->at(i).at(j+1) = 'I'; + if ( ( cur_last_peak < i ) && ( current_best == icost ) ) + { + cur_last_peak = i; + } + } + } + if ( i < ref_size ) + { + dcost = score + delete_cost; + if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) ) + { + S->at(i+1).at(j) = dcost; + P->at(i+1).at(j) = 'D'; + if ( i >= last_good ) + { + last_good = i + 1 ; + } + } + } } - } else { - cost = substitute_cost + score; - if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) { - S[i+1][j+1] = cost; - P[i+1][j+1] = 'S'; - if ( cost < current_best ) { - current_best = cost; - } - if ( current_best == cost ) { - cur_last_peak = i + 1 ; - } + } + + + int tracelength = 0; + i = ref_size;; + j = hyp_size; + while ( ( i > 0 ) || ( j > 0 ) ) + { + tracelength++; + if (P->at(i).at(j) == 'A' ) + { + i--; + j--; } - } + else + if (P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'D' ) + { + i--; + } + else + if (P->at(i).at(j) == 'I' ) + { + j--; + } + else + { + cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " <at(i).at(j) << endl; + exit ( -1 ); + } } - } - cur_last_good = i + 1; - if ( j < ( int ) hyp.size() ) { - icost = score + insert_cost; - if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) { - S[i][j+1] = icost; - P[i][j+1] = 'I'; - if ( ( cur_last_peak < i ) && ( current_best == icost ) ) { - cur_last_peak = i; - } + vector path ( tracelength ); + i = ref_size; + j = hyp_size; + while ( ( i > 0 ) || ( j > 0 ) ) + { + path[--tracelength] =P->at(i).at(j); + if (P->at(i).at(j) == 'A' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'D' ) + { + i--; + } + else + if (P->at(i).at(j) == 'I' ) + { + j--; + } } - } - if ( i < ( int ) ref.size() ) { - dcost = score + delete_cost; - if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) { - S[i+1][j] = dcost; - P[i+1][j] = 'D'; - if ( i >= last_good ) { - last_good = i + 1 ; - } +// terAlignment to_return; + to_return->numWords = ref_size; + to_return->alignment = path; + to_return->numEdits = S->at(ref_size).at(hyp_size); + to_return->hyp = hyp; + to_return->ref = ref; + to_return->averageWords = ref_size; + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return->toString() << endl << "END DEBUG" << endl; } - } - } - } - - - int tracelength = 0; - i = ref.size(); - j = hyp.size(); - while ( ( i > 0 ) || ( j > 0 ) ) { - tracelength++; - if ( P[i][j] == 'A' ) { - i--; - j--; - } else if ( P[i][j] == 'S' ) { - i--; - j--; - } else if ( P[i][j] == 'D' ) { - i--; - } else if ( P[i][j] == 'I' ) { - j--; - } else { - cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl; - exit ( -1 ); - } - } - vector path ( tracelength ); - i = ref.size(); - j = hyp.size(); - while ( ( i > 0 ) || ( j > 0 ) ) { - path[--tracelength] = P[i][j]; - if ( P[i][j] == 'A' ) { - i--; - j--; - } else if ( P[i][j] == 'S' ) { - i--; - j--; - } else if ( P[i][j] == 'D' ) { - i--; - } else if ( P[i][j] == 'I' ) { - j--; +// return to_return; + } - } - terAlignment to_return; - to_return.numWords = ref.size(); - to_return.alignment = path; - to_return.numEdits = S[ref.size() ][hyp.size() ]; - to_return.hyp = hyp; - to_return.ref = ref; - to_return.averageWords = (int)ref.size(); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl; - } - return to_return; -} -terAlignment terCalc::TER ( vector hyp, vector ref ) -{ - hashMapInfos rloc = createConcordMots ( hyp, ref ); - terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans ); - vector cur = hyp; - cur_align.hyp = hyp; - cur_align.ref = ref; - cur_align.aftershift = hyp; - double edits = 0; + + terAlignment terCalc::TER ( vector& hyp, vector& ref ) + { + hashMapInfos rloc = createConcordMots ( hyp, ref ); + terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans ); + vector cur = hyp; + cur_align.hyp = hyp; + cur_align.ref = ref; + cur_align.aftershift = hyp; + double edits = 0; // int numshifts = 0; - vector allshifts; + vector * allshifts=new vector(0); + bestShiftStruct * returns=new bestShiftStruct(); // cerr << "Initial Alignment:" << endl << cur_align.toString() < cur, vector hyp, vector ref, hashMapInfos rloc, terAlignment med_align ) -{ - bestShiftStruct to_return; - bool anygain = false; - bool herr[ ( int ) hyp.size() ]; - bool rerr[ ( int ) ref.size() ]; - int ralign[ ( int ) ref.size() ]; - calculateTerAlignment ( med_align, herr, rerr, ralign ); - vector poss_shifts; - - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl; - cerr << "indices: "; - for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) { - cerr << l_i << "\t"; - } - cerr << endl; - cerr << "hyp : \t"<= 0; i-- ) { - for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) { - cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl; - } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl; + } + while ( true ) + { + + returns=findBestShift ( cur, hyp, ref, rloc, cur_align ); +// cerr << "****************************************************************** " << returns->getEmpty() << endl; + if ( returns->getEmpty()) + { + break; + } + terShift bestShift = (*(returns->m_best_shift)); + cur_align = (*(returns->m_best_align)); + edits += bestShift.cost; + bestShift.alignment = cur_align.alignment; + bestShift.aftershift = cur_align.aftershift; + allshifts->push_back ( bestShift ); + cur = cur_align.aftershift; + delete(returns); + } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::TER : Final to return :" << endl << cur_align.toString() << endl << "END DEBUG" << endl; + } + terAlignment to_return; + to_return = cur_align; + to_return.allshifts = (*(allshifts)); + to_return.numEdits += edits; + NBR_SEGS_EVALUATED++; + return to_return; } - cerr << endl; - cerr << "END DEBUG " << endl; - } + bestShiftStruct * terCalc::findBestShift ( vector& cur, vector& hyp, vector& ref, hashMapInfos& rloc, terAlignment& med_align ) + { + CALL_FIND_BSHIFT++; +// cerr << "CALL_FIND_BSHIFT " << CALL_FIND_BSHIFT <m_empty = new bool(false); + bool anygain = false; + vector * herr = new vector(( int ) hyp.size() + 1 ); + vector * rerr = new vector( ( int ) ref.size() + 1 ); + vector * ralign = new vector( ( int ) ref.size() + 1 ); + int l_i,i,j,s; + for (i = 0 ; i< ( int ) hyp.size() + 1 ; i++) + { + herr->at(i)=false; + } + for (i = 0 ; i< ( int ) ref.size() + 1 ; i++) + { + rerr->at(i)=false; + ralign->at(i)=-1; + } + calculateTerAlignment ( med_align, herr, rerr, ralign ); + vector * poss_shifts = new vector< vector >(0) ; + terAlignment * cur_best_align = new terAlignment(); + terShift * cur_best_shift = new terShift(); + double cur_best_shift_cost = 0.0; + vector shiftarr; + vector curHypSpans; + terShift * curshift = new terShift(); + alignmentStruct shiftReturns; + terAlignment * curalign = new terAlignment() ; + + + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl; + cerr << "indices: "; + for (l_i=0; l_i < ( int ) ref.size() ; l_i++) + { + cerr << l_i << "\t"; + } + cerr << endl; + cerr << "hyp : \t"<size() - 1; i >= 0; i-- ) + { + for ( j = 0; j < ( int ) ( poss_shifts->at ( i ) ).size(); j++ ) + { + cerr << " [" << i << "] " << ( ( poss_shifts->at ( i ) ).at ( j ) ).toString() << endl; + } + } + cerr << endl; + cerr << "END DEBUG " << endl; + } // exit(0); - double cur_best_shift_cost = 0.0; - terAlignment cur_best_align = med_align; - terShift cur_best_shift; - + cur_best_align->set(med_align); + for ( i = ( int ) poss_shifts->size() - 1; i >= 0; i-- ) + { + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Considering shift of length " << i << " (" << ( poss_shifts->at ( i ) ).size() << ")" << endl; + cerr << "END DEBUG " << endl; + } + /* Consider shifts of length i+1 */ + double curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits ); + double maxfix = ( 2 * ( 1 + i ) ); + if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) + { + break; + } + else + { + for ( s = 0; s < ( int ) ( poss_shifts->at ( i ) ).size(); s++ ) + { + curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits ); + if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) + { + break; + } + else + { + curshift->set(( poss_shifts->at ( i ) ).at ( s )); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "cur : "<< join(" ",cur) << endl; + cerr << "shift size : "<< i << endl; + cerr << "shift number : "<< s << endl; + cerr << "size of shift size : "<< ( int ) ( poss_shifts->at ( i ) ).size() << endl; + cerr << "curshift : "<< curshift->toString() << endl; + + } +// alignmentStruct shiftReturns; + shiftReturns.set(permuter ( cur, curshift )); + shiftarr = shiftReturns.nwords; + curHypSpans = shiftReturns.aftershift; + if ( PRINT_DEBUG ) + { + cerr << "shiftarr : "<< join(" ",shiftarr) << endl; + cerr << "curHypSpans size : "<< (int)curHypSpans.size() << endl; + cerr << "END DEBUG " << endl; + } +// terAlignment tmp=minimizeDistanceEdition ( shiftarr, ref, curHypSpans ); + minimizeDistanceEdition ( shiftarr, ref, curHypSpans, curalign ); +// curalign->set(tmp); - for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) { - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl; - cerr << "END DEBUG " << endl; - } - /* Consider shifts of length i+1 */ - double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); - double maxfix = ( 2 * ( 1 + i ) ); - if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { - break; - } + curalign->hyp = hyp; + curalign->ref = ref; + curalign->aftershift = shiftarr; - for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) { - curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); - if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { - break; - } - terShift curshift = ( poss_shifts.at ( i ) ).at ( s ); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "cur : "<< join(" ",cur) << endl; - cerr << "curshift : "<< curshift.toString() << endl; - - } - alignmentStruct shiftReturns = permuter ( cur, curshift ); - vector shiftarr = shiftReturns.nwords; - vector curHypSpans = shiftReturns.aftershift; - - if ( PRINT_DEBUG ) { - cerr << "shiftarr : "<< join(" ",shiftarr) << endl; -// cerr << "curHypSpans : "<< curHypSpans.toString() << endl; - cerr << "END DEBUG " << endl; - } - terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans ); - - curalign.hyp = hyp; - curalign.ref = ref; - curalign.aftershift = shiftarr; - - - double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost ); - - // if (DEBUG) { - // string testeuh=terAlignment join(" ", shiftarr); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl; - cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"< 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) { - anygain = true; - cur_best_shift = curshift; - cur_best_shift_cost = curshift.cost; - cur_best_align = curalign; - // if (DEBUG) - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl; - cerr << "END DEBUG " << endl; - } - } - } - } - if ( anygain ) { - to_return.m_best_shift = cur_best_shift; - to_return.m_best_align = cur_best_align; - to_return.m_empty = false; - } else { - to_return.m_empty = true; - } - return to_return; -} -void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ) -{ - int hpos = -1; - int rpos = -1; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl; - cerr << "END DEBUG " << endl; - } - for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) { - herr[i] = false; - rerr[i] = false; - ralign[i] = -1; - } - for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) { - char sym = align.alignment[i]; - if ( sym == 'A' ) { - hpos++; - rpos++; - herr[hpos] = false; - rerr[rpos] = false; - ralign[rpos] = hpos; - } else if ( sym == 'S' ) { - hpos++; - rpos++; - herr[hpos] = true; - rerr[rpos] = true; - ralign[rpos] = hpos; - } else if ( sym == 'I' ) { - hpos++; - herr[hpos] = true; - } else if ( sym == 'D' ) { - rpos++; - rerr[rpos] = true; - ralign[rpos] = hpos+1; - } else { - cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl; - exit ( -1 ); - } - } -} + double gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost ); -vector terCalc::calculerPermutations ( vector hyp, vector ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ) -{ - vector to_return; - if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) { - return to_return; - } - - vector allshifts ( TAILLE_PERMUT_MAX + 1 ); - for ( int start = 0; start < ( int ) hyp.size(); start++ ) { - string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) ); - if ( ! rloc.trouve ( subVectorHypString ) ) { - continue; - } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Gain for " << curshift->toString() << " is " << gain << ". (result: [" << curalign->join ( " ", shiftarr ) << "]" << endl; + cerr << "Details of gains : gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost )"<numEdits << "+" << cur_best_shift_cost << ") - (" << curalign->numEdits << "+" << curshift->cost << ")"<toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } - bool ok = false; - vector mtiVec = rloc.getValue ( subVectorHypString ); - vector::iterator mti = mtiVec.begin(); - while ( mti != mtiVec.end() && ( ! ok ) ) { - int moveto = ( *mti ); - mti++; - if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) { - ok = true; - } - } - if ( ! ok ) { - continue; - } - ok = true; - for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) { - /* check if cand is good if so, add it */ - vector cand = subVector ( hyp, start, end + 1 ); - ok = false; - if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) { - continue; - } - - bool any_herr = false; - - for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) { - if ( herr[start+i] ) { - any_herr = true; + if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) + { + anygain = true; + cur_best_shift->set(curshift); + cur_best_shift_cost = curshift->cost; + cur_best_align->set(curalign); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Tmp Choosing shift: " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } + } + } + } + } } - } - if ( any_herr == false ) { - ok = true; - continue; - } - - vector movetoitVec; - movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) ); -// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl; - vector::iterator movetoit = movetoitVec.begin(); - while ( movetoit != movetoitVec.end() ) { - int moveto = ( *movetoit ); - movetoit++; - if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) { - continue; + bestShiftStruct * to_return=new bestShiftStruct(); + if ( anygain ) + { + to_return->setEmpty(false); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Final shift chosen : " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } + to_return->m_best_shift->set(cur_best_shift); +// terAlignment tmp=cur_best_align; +// cur_best_align->toString(); +// to_return.m_best_align.toString(); +// if ((int)cur_best_align->alignment.size() == 0) +// { +// to_return.m_best_align = cur_best_align; +// } +// else +// { +// cerr << "Warning: cur_best_align->alignment.size() = 0 !!!"<m_best_align->set(cur_best_align); +// to_return.m_best_align.toString(); } - ok = true; + else + { + to_return->setEmpty(true); + } +// // cerr << to_return->toString() << endl; + delete(poss_shifts); + delete(cur_best_align); + delete(cur_best_shift); + delete(curshift); + delete(curalign) ; + return to_return; + } - /* check to see if there are any errors in either string - (only move if this is the case!) - */ + void terCalc::calculateTerAlignment ( terAlignment& align, vector* herr, vector* rerr, vector* ralign ) + { + int hpos = -1; + int rpos = -1; + CALL_TER_ALIGN++; +// cerr << "CALL_TER_ALIGN " << CALL_TER_ALIGN << endl; + if ( PRINT_DEBUG ) + { - bool any_rerr = false; - for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) { - if ( rerr[moveto+i] ) { - any_rerr = true; - } + cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl; + cerr << "END DEBUG " << endl; } - if ( ! any_rerr ) { - continue; +// cerr << (int)herr->size() <size() <at(i) = false; +// rerr->at(i) = false; +// ralign->at(i) = -1; +// } + for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) + { + char sym = align.alignment.at(i); + if ( sym == 'A' ) + { + hpos++; + rpos++; + herr->at(hpos) = false; + rerr->at(rpos) = false; + ralign->at(rpos) = hpos; + } + else + if ( sym == 'S' ) + { + hpos++; + rpos++; + herr->at(hpos) = true; + rerr->at(rpos) = true; + ralign->at(rpos) = hpos; + } + else + if ( sym == 'I' ) + { + hpos++; + herr->at(hpos) = true; + } + else + if ( sym == 'D' ) + { + rpos++; + rerr->at(rpos) = true; + ralign->at(rpos) = hpos+1; + } + else + { + cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl; + exit ( -1 ); + } } - for ( int roff = -1; roff <= ( end - start ); roff++ ) { - terShift topush; - bool topushNull = true; - if ( ( roff == -1 ) && ( moveto == 0 ) ) { - if ( PRINT_DEBUG ) { + } - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl; + vector * terCalc::calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ) + { + vector * allshifts = new vector(0); +// to_return.clear(); + CALL_CALC_PERMUT++; +// cerr << "CALL_CALC_PERMUT " << CALL_CALC_PERMUT << endl; + if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) + { + return allshifts; + } + allshifts = new vector( TAILLE_PERMUT_MAX + 1 ); + int start=0; + int end=0; + bool ok = false; + vector mtiVec(0); + vector::iterator mti; + int moveto=0; + vector cand(0); + bool any_herr = false; + bool any_rerr = false; + int i=0; + int l_nbr_permuts=0; +// for (i=0; i< (int)ref.size() +1 ; i++) {cerr << " " << ralign[i] ;} cerr < movetoitVec(0); + string subVectorHypString=""; + terShift * topush; + for ( start = 0; start < ( int ) hyp.size(); start++ ) + { + subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) ); + if ( ! rloc.trouve ( subVectorHypString ) ) + { + continue; } - terShift t01 ( start, end, -1, -1 ); - topush = t01; - topushNull = false; - } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) { - int newloc = ralign[moveto+roff]; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl; + + ok = false; + mtiVec = rloc.getValue ( subVectorHypString ); + mti = mtiVec.begin(); + while ( mti != mtiVec.end() && ( ! ok ) ) + { + moveto = ( *mti ); + mti++; + if ( ( start != ralign->at(moveto) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) - 1 ) <= DIST_MAX_PERMUT ) ) + { + ok = true; + } } - terShift t02 ( start, end, moveto + roff, newloc ); - topush = t02; - topushNull = false; - } - if ( !topushNull ) { - topush.shifted = cand; - topush.cost = shift_cost; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl; - cerr << "start : " << start << endl; - cerr << "end : " << end << endl; - cerr << "end - start : " << end - start << endl; - cerr << "END DEBUG " << endl; + if ( ! ok ) + { + continue; } - ( allshifts.at ( end - start ) ).push_back ( topush ); - } - } - } - } - } - to_return.clear(); - for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) { - to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) ); - } - return to_return; -} + ok = true; + for ( end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) + { + /* check if cand is good if so, add it */ + cand = subVector ( hyp, start, end + 1 ); + ok = false; + if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) + { + continue; + } + any_herr = false; -alignmentStruct terCalc::permuter ( vector words, terShift s ) -{ - return permuter ( words, s.start, s.end, s.newloc ); -} + for ( i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) + { + if ( herr->at(start+i) ) + { + any_herr = true; + } + } + if ( any_herr == false ) + { + ok = true; + continue; + } + movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) ); +// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl; + vector::iterator movetoit; + movetoit = movetoitVec.begin(); + while ( movetoit != movetoitVec.end() ) + { + moveto = ( *movetoit ); + movetoit++; + if ( ! ( ( ralign->at(moveto) != start ) && ( ( ralign->at(moveto) < start ) || ( ralign->at(moveto) > end ) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) ) <= DIST_MAX_PERMUT ) ) ) + { + continue; + } + ok = true; -alignmentStruct terCalc::permuter ( vector words, int start, int end, int newloc ) -{ - int c = 0; - vector nwords ( words ); - vector spans ( ( int ) hypSpans.size() ); - alignmentStruct to_return; - if ( PRINT_DEBUG ) { - - if ( ( int ) hypSpans.size() > 0 ) { - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ; - } else { - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ; - } - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl; - } - if (newloc >= ( int ) words.size()) { - if ( PRINT_DEBUG ) { - cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<at(moveto+i) ) + { + any_rerr = true; + } + } + if ( ! any_rerr ) + { + continue; + } + for ( int roff = -1; roff <= ( end - start ); roff++ ) + { + topush = new terShift(); + bool topushNull = true; + if ( ( roff == -1 ) && ( moveto == 0 ) ) + { + if ( PRINT_DEBUG ) + { - if ( newloc == -1 ) { - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl; + } +// terShift t01 ( start, end, -1, -1 ); +// topush = t01; + topush->start=start; + topush->end=end; + topush->moveto=-1; + topush->newloc=-1; + topushNull = false; + } + else + if ( ( start != ralign->at(moveto+roff) ) && ( ( roff == 0 ) || ( ralign->at(moveto+roff) != ralign->at(moveto) ) ) ) + { + int newloc = ralign->at(moveto+roff); + if ( PRINT_DEBUG ) + { + + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl; + } +// terShift t02 ( start, end, moveto + roff, newloc ); +// topush = t02; + topush->start=start; + topush->end=end; + topush->moveto=moveto + roff; + topush->newloc=newloc; + topushNull = false; + } + if ( !topushNull ) + { + topush->shifted = cand; + topush->cost = shift_cost; + l_nbr_permuts++; + if ( PRINT_DEBUG ) + { + + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl; + cerr << "start : " << start << endl; + cerr << "end : " << end << endl; + cerr << "end - start : " << end - start << endl; + cerr << "nbr Permutations added: " << l_nbr_permuts << endl; + cerr << "END DEBUG " << endl; + } + if (l_nbr_permuts < NBR_PERMUT_MAX + 1) + { + ( allshifts->at ( end - start ) ).push_back ( (*(topush)) ); + } +// else +// { +// break; +// } + } + delete(topush); + } + } + } + } +// to_return.clear(); +// for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) +// { +// to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) ); +// } + return allshifts; } - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + + + alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift& s ) + { + return permuter ( words, s.start, s.end, s.newloc ); } - for ( int i = end + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift* s ) + { + return permuter ( words, s->start, s->end, s->newloc ); } - } else { - if ( newloc < start ) { - for ( int i = 0; i < newloc; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = newloc ; i < start ; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - } else { - if ( newloc > end ) { - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; i <= newloc; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - } else { - // we are moving inside of ourselves - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + + alignmentStruct terCalc::permuter ( vector< string >& words, int start, int end, int newloc ) + { + int c = 0; + vector nwords ( words ); + vector spans ( ( int ) hypSpans.size() ); + alignmentStruct to_return; + if ( PRINT_DEBUG ) + { + + if ( ( int ) hypSpans.size() > 0 ) + { + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ; + } + else + { + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ; + } + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl; } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + if (newloc >= ( int ) words.size()) + { + if ( PRINT_DEBUG ) + { + cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."< 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = 0; i <= start - 1;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } } - for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + else + { + if ( newloc < start ) + { + + for ( int i = 0; i < newloc; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = newloc ; i < start ;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + else + { + if ( newloc > end ) + { + for ( int i = 0; i <= start - 1; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i <= newloc;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = newloc + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + else + { + // we are moving inside of ourselves + for ( int i = 0; i <= start - 1; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + } } - } - } - } - NBR_PERMUTS_CONSID++; - - if ( PRINT_DEBUG ) { - cerr << "nwords" << join(" ",nwords) << endl; + NBR_PERMUTS_CONSID++; + + if ( PRINT_DEBUG ) + { + cerr << "nwords" << join(" ",nwords) << endl; // cerr << "spans" << spans. << endl; - } - - to_return.nwords = nwords; - to_return.aftershift = spans; - return to_return; -} -void terCalc::setDebugMode ( bool b ) -{ - PRINT_DEBUG = b; -} + } + + to_return.nwords = nwords; + to_return.aftershift = spans; + return to_return; + } + void terCalc::setDebugMode ( bool b ) + { + PRINT_DEBUG = b; + } } diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h index 778d833951..22b5e2c9df 100644 --- a/mert/TER/tercalc.h +++ b/mert/TER/tercalc.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef _TERCPPTERCALC_H__ -#define _TERCPPTERCALC_H__ +#ifndef _TERCPPTERCALC_H___ +#define _TERCPPTERCALC_H___ #include #include @@ -41,62 +41,70 @@ namespace TERCpp { // typedef size_t WERelement[2]; // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) -typedef vector vecTerShift; -/** - @author -*/ -class terCalc -{ -private : + typedef vector vecTerShift; + /** + @author + */ + class terCalc + { + private : // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) - WERalignment l_WERalignment; + WERalignment l_WERalignment; // HashMap contenant les valeurs de hash de chaque mot - hashMap bagOfWords; - int TAILLE_PERMUT_MAX; - // Increments internes - int NBR_SEGS_EVALUATED; - int NBR_PERMUTS_CONSID; - int NBR_BS_APPELS; - int DIST_MAX_PERMUT; - bool PRINT_DEBUG; + hashMap bagOfWords; + int TAILLE_PERMUT_MAX; + int NBR_PERMUT_MAX; + // Increments internes + int NBR_SEGS_EVALUATED; + int NBR_PERMUTS_CONSID; + int NBR_BS_APPELS; + int DIST_MAX_PERMUT; + int CALL_TER_ALIGN; + int CALL_CALC_PERMUT; + int CALL_FIND_BSHIFT; + int MAX_LENGTH_SENTENCE; + bool PRINT_DEBUG; - // Utilisés dans minDistEdit et ils ne sont pas réajustés - double S[1000][1000]; - char P[1000][1000]; - vector refSpans; - vector hypSpans; - int TAILLE_BEAM; + // Utilisés dans minDistEdit et ils ne sont pas réajustés + vector < vector < double > > * S; + vector < vector < char > > * P; + vector refSpans; + vector hypSpans; + int TAILLE_BEAM; -public: - int shift_cost; - int insert_cost; - int delete_cost; - int substitute_cost; - int match_cost; - double infinite; - terCalc(); + public: + int shift_cost; + int insert_cost; + int delete_cost; + int substitute_cost; + int match_cost; + double infinite; + terCalc(); -// ~terCalc(); + ~terCalc(); // size_t* hashVec ( vector s ); - void setDebugMode ( bool b ); + void setDebugMode ( bool b ); // int WERCalculation ( size_t * ref, size_t * hyp ); // int WERCalculation ( vector ref, vector hyp ); // int WERCalculation ( vector ref, vector hyp ); - terAlignment WERCalculation ( vector hyp, vector ref ); + terAlignment WERCalculation ( vector< string >& hyp, vector< string >& ref ); // string vectorToString(vector vec); // vector subVector(vector vec, int start, int end); - hashMapInfos createConcordMots ( vector hyp, vector ref ); - terAlignment minimizeDistanceEdition ( vector hyp, vector ref, vector curHypSpans ); - bool trouverIntersection ( vecInt refSpan, vecInt hypSpan ); - terAlignment TER ( vector hyp, vector ref , float avRefLength ); - terAlignment TER ( vector hyp, vector ref ); - terAlignment TER ( vector hyp, vector ref ); - bestShiftStruct findBestShift ( vector cur, vector hyp, vector ref, hashMapInfos rloc, terAlignment cur_align ); - void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ); - vector calculerPermutations ( vector hyp, vector ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ); - alignmentStruct permuter ( vector words, terShift s ); - alignmentStruct permuter ( vector words, int start, int end, int newloc ); -}; + hashMapInfos createConcordMots ( vector& hyp, vector& ref ); + terAlignment minimizeDistanceEdition ( vector& hyp, vector& ref, vector& curHypSpans ); + void minimizeDistanceEdition ( vector& hyp, vector& ref, vector& curHypSpans , terAlignment* l_terAlign); +// terAlignment minimizeDistanceEdition ( vector& hyp, vector& ref, vector& curHypSpans ); + bool trouverIntersection ( vecInt& refSpan, vecInt& hypSpan ); + terAlignment TER ( vector& hyp, vector& ref , float avRefLength ); + terAlignment TER ( vector& hyp, vector& ref ); + terAlignment TER ( vector& hyp, vector& ref ); + bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& med_align ); + void calculateTerAlignment ( terAlignment& align, vector* herr, vector* rerr, vector* ralign ); + vector * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ); + alignmentStruct permuter ( vector& words, terShift& s ); + alignmentStruct permuter ( vector& words, terShift* s ); + alignmentStruct permuter ( vector& words, int start, int end, int newloc ); + }; } diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp index 8858a71191..22ee091a85 100644 --- a/mert/TER/tools.cpp +++ b/mert/TER/tools.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -25,677 +25,811 @@ using namespace boost::xpressive; namespace Tools { -string vectorToString ( vector vec ) -{ - string retour ( "" ); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour += ( *vecIter ); - } else { - retour += "\t" + ( *vecIter ); + string vectorToString ( vector vec ) + { + string retour ( "" ); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour += ( *vecIter ); + } + else + { + retour += "\t" + ( *vecIter ); + } + } + return retour; } - } - return retour; -} -string vectorToString ( vector vec ) -{ - stringstream retour; - retour.str(""); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << "\t" << ( *vecIter ); + string vectorToString ( vector vec ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); } - } - return retour.str(); -} -string vectorToString ( vector vec ) -{ - stringstream retour; - retour.str(""); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << "\t" << ( *vecIter ); + string vectorToString ( vector vec ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); + } + string vectorToString ( vector * vec ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec->begin();vecIter != vec->end(); vecIter++ ) + { + if ( vecIter == vec->begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); } - } - return retour.str(); -} -string vectorToString ( vector< string > vec, string s ) -{ - string retour ( "" ); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour += ( *vecIter ); - } else { - retour += s + ( *vecIter ); + string vectorToString ( vector< string > vec, string s ) + { + string retour ( "" ); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour += ( *vecIter ); + } + else + { + retour += s + ( *vecIter ); + } + } + return retour; + } - } - return retour; -} + string vectorToString ( vector< char > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< char > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); -} + string vectorToString ( vector< int > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< int > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); -} + string vectorToString ( vector< bool > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< bool > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); + string vectorToString ( char* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -} -string vectorToString ( char* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); -} + string vectorToString ( int* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -string vectorToString ( int* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); -} + string vectorToString ( bool* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -string vectorToString ( bool* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); - -} + + string vectorToString ( vector* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec->at(l_i); + } + else + { + retour << s << vec->at(l_i); + } + } + return retour.str(); -vector subVector ( vector vec, int start, int end ) -{ - vector retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + } -vector subVector ( vector vec, int start, int end ) -{ - vector retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + string vectorToString ( vector* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec->at(l_i); + } + else + { + retour << s << vec->at(l_i); + } + } + return retour.str(); -vector subVector ( vector vec, int start, int end ) -{ - vector retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + } -vector copyVector ( vector vec ) -{ - vector retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector copyVector ( vector vec ) -{ - vector retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector copyVector ( vector vec ) -{ - vector retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector stringToVector ( string s, string tok ) -{ - vector to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - to_return.push_back ( to_push ); - to_push = ""; - pushed = true; - } + + + vector subVector ( vector vec, int start, int end ) + { + vector retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + + vector subVector ( vector vec, int start, int end ) + { + vector retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - } - to_return.push_back ( to_push ); - return to_return; -} -vector stringToVectorInt ( string s, string tok ) -{ - vector to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); + + vector subVector ( vector vec, int start, int end ) + { + vector retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); } - to_push = ""; - pushed = true; - } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + + vector copyVector ( vector vec ) + { + vector retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - } - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); - } - return to_return; -} -vector stringToVectorFloat ( string s, string tok ) -{ - vector to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atof ( to_push.c_str() ) ); + vector copyVector ( vector vec ) + { + vector retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); } - to_push = ""; - pushed = true; - } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + vector copyVector ( vector vec ) + { + vector retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; + } + vector stringToVector ( string s, string tok ) + { + vector to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + to_return.push_back ( to_push ); + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + to_return.push_back ( to_push ); + return to_return; + } + vector stringToVectorInt ( string s, string tok ) + { + vector to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + return to_return; + } + vector stringToVectorFloat ( string s, string tok ) + { + vector to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atof ( to_push.c_str() ) ); + } + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + return to_return; } - } - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); - } - return to_return; -} -string lowerCase ( string str ) -{ - for ( int i = 0; i < ( int ) str.size(); i++ ) { - if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) { - str[i] = str[i] + 0x20; + string lowerCase ( string str ) + { + for ( int i = 0;i < ( int ) str.size();i++ ) + { + if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) + { + str[i] = str[i] + 0x20; + } + } + return str; } - } - return str; -} -string removePunctTercom ( string str ) -{ - string str_mod = str; - sregex rex; - string replace; + string removePunctTercom ( string str ) + { + string str_mod = str; + sregex rex; + string replace; - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[,]" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[,]" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([\\.]$)" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([\\.]$)" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\:]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\:]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} -string removePunct ( string str ) -{ - string str_mod = str; - sregex rex; - string replace; + return str_mod; + } + string removePunct ( string str ) + { + string str_mod = str; + sregex rex; + string replace; - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[,]" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[,]" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([\\.]$)" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([\\.]$)" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\:]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\:]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} -string tokenizePunct ( string str ) -{ - string str_mod = str; - sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" ); - string replace ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + return str_mod; + } + string tokenizePunct ( string str ) + { + string str_mod = str; + sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" ); + string replace ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" ); - replace = ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" ); + replace = ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" ); - replace = ( "$2.$4. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" ); + replace = ( "$2.$4. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " ? " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " ? " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " ; " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " ; " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" ); - replace = ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" ); + replace = ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " ! " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " ! " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " ( " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " ( " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " ) " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " ) " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " \" " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " \" " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" ); - replace = ( "num_($2)" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" ); + replace = ( "num_($2)" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" ); - replace = ( "ordinal_($2)" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" ); + replace = ( "ordinal_($2)" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Dd]r) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Dd]r) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Dd]r) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Dd]r) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]r) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]r) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]r) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]r) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]rs) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]rs) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]rs) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]rs) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Nn]o) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Nn]o) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Nn]o) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Nn]o) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); // rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" ); // replace = ( "$2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" ); // replace = ( " $2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" ); // replace = ( "$2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" ); // replace = ( " $2." ); // str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" ); - replace = ( "$2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); - - rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" ); + replace = ( "$2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" ); - replace = ( "$2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" ); + replace = ( "$2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); + + return str_mod; + } -string normalizeStd ( string str ) -{ - string str_mod = str; - sregex rex = sregex::compile ( "()" ); - string replace ( "" ); - str_mod = regex_replace ( str_mod, rex, replace ); + string normalizeStd ( string str ) + { + string str_mod = str; + sregex rex = sregex::compile ( "()" ); + string replace ( "" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "-\n" ); - replace = ( "" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "-\n" ); + replace = ( "" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "\n" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "\n" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( """ ); - replace = ( "\"" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( """ ); + replace = ( "\"" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "&" ); - replace = ( "& " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "&" ); + replace = ( "& " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "<" ); - replace = ( "<" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "<" ); + replace = ( "<" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( ">" ); - replace = ( ">" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( ">" ); + replace = ( ">" ); + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} + return str_mod; + } -param copyParam ( param p ) -{ - param to_return; - to_return.caseOn = p.caseOn; - to_return.noPunct = p.noPunct; - to_return.debugMode = p.debugMode; - to_return.debugLevel = p.debugLevel; - to_return.hypothesisFile = p.hypothesisFile; - to_return.referenceFile = p.referenceFile; - to_return.normalize = p.normalize; - to_return.noTxtIds = p.noTxtIds; - to_return.outputFileExtension = p.outputFileExtension; - to_return.outputFileName = p.outputFileName; - to_return.sgmlInputs = p.sgmlInputs; - to_return.tercomLike = p.tercomLike; - to_return.printAlignments = p.printAlignments; - to_return.WER=p.WER; - return to_return; -} -string printParams ( param p ) -{ - stringstream s; - s << "caseOn = " << p.caseOn << endl; - s << "noPunct = " << p.noPunct << endl; - s << "debugMode = " << p.debugMode << endl; - s << "debugLevel = " << p.debugLevel << endl; - s << "hypothesisFile = " << p.hypothesisFile << endl; - s << "referenceFile = " << p.referenceFile << endl; - s << "normalize = " << p.normalize << endl; - s << "noTxtIds = " << p.noTxtIds << endl; - s << "outputFileExtension = " << p.outputFileExtension << endl; - s << "outputFileName = " << p.outputFileName << endl; - s << "sgmlInputs = " << p.sgmlInputs << endl; - s << "tercomLike = " << p.tercomLike << endl; - return s.str(); + param copyParam ( param p ) + { + param to_return; + to_return.caseOn = p.caseOn; + to_return.noPunct = p.noPunct; + to_return.debugMode = p.debugMode; + to_return.debugLevel = p.debugLevel; + to_return.hypothesisFile = p.hypothesisFile; + to_return.referenceFile = p.referenceFile; + to_return.normalize = p.normalize; + to_return.noTxtIds = p.noTxtIds; + to_return.verbose = p.verbose; + to_return.count_verbose = p.count_verbose; + to_return.outputFileExtension = p.outputFileExtension; + to_return.outputFileName = p.outputFileName; + to_return.sgmlInputs = p.sgmlInputs; + to_return.tercomLike = p.tercomLike; + to_return.printAlignments = p.printAlignments; + to_return.WER=p.WER; + return to_return; + } + string printParams ( param p ) + { + stringstream s; + s << "caseOn = " << p.caseOn << endl; + s << "noPunct = " << p.noPunct << endl; + s << "debugMode = " << p.debugMode << endl; + s << "debugLevel = " << p.debugLevel << endl; + s << "hypothesisFile = " << p.hypothesisFile << endl; + s << "referenceFile = " << p.referenceFile << endl; + s << "normalize = " << p.normalize << endl; + s << "noTxtIds = " << p.noTxtIds << endl; + s << "outputFileExtension = " << p.outputFileExtension << endl; + s << "outputFileName = " << p.outputFileName << endl; + s << "sgmlInputs = " << p.sgmlInputs << endl; + s << "tercomLike = " << p.tercomLike << endl; + s << "verbose = " << p.verbose << endl; + s << "count_verbose = " << p.count_verbose << endl; + return s.str(); -} -string join ( string delim, vector arr ) -{ - if ( ( int ) arr.size() == 0 ) return ""; + } + string join ( string delim, vector arr ) + { + if ( ( int ) arr.size() == 0 ) return ""; // if ((int)delim.compare("") == 0) delim = new String(""); // String s = new String(""); - stringstream s; - s.str ( "" ); - for ( int i = 0; i < ( int ) arr.size(); i++ ) { - if ( i == 0 ) { - s << arr.at ( i ); - } else { - s << delim << arr.at ( i ); - } - } - return s.str(); + stringstream s; + s.str ( "" ); + for ( int i = 0; i < ( int ) arr.size(); i++ ) + { + if ( i == 0 ) + { + s << arr.at ( i ); + } + else + { + s << delim << arr.at ( i ); + } + } + return s.str(); // return ""; -} + } } diff --git a/mert/TER/tools.h b/mert/TER/tools.h index 157b739a5e..4c3b108cd7 100644 --- a/mert/TER/tools.h +++ b/mert/TER/tools.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TOOLS_H_ -#define MERT_TER_TOOLS_H_ +#ifndef __TERCPPTOOLS_H__ +#define __TERCPPTOOLS_H__ #include @@ -35,31 +35,34 @@ using namespace std; namespace Tools { -typedef vector vecDouble; -typedef vector vecChar; -typedef vector vecInt; -typedef vector vecFloat; -typedef vector vecSize_t; -typedef vector vecString; -typedef vector alignmentElement; -typedef vector WERalignment; + typedef vector vecDouble; + typedef vector vecChar; + typedef vector vecInt; + typedef vector vecFloat; + typedef vector vecSize_t; + typedef vector vecString; + typedef vector alignmentElement; + typedef vector WERalignment; -struct param { - bool debugMode; - string referenceFile; // path to the resources - string hypothesisFile; // path to the configuration files - string outputFileExtension; - string outputFileName; - bool noPunct; - bool caseOn; - bool normalize; - bool tercomLike; - bool sgmlInputs; - bool noTxtIds; - bool printAlignments; - bool WER; - int debugLevel; +struct param +{ + bool debugMode; + string referenceFile; // path to the resources + string hypothesisFile; // path to the configuration files + string outputFileExtension; + string outputFileName; + bool noPunct; + bool caseOn; + bool normalize; + bool tercomLike; + bool sgmlInputs; + bool verbose; + bool count_verbose; + bool noTxtIds; + bool printAlignments; + bool WER; + int debugLevel; }; // param = { false, "","","","" }; @@ -67,35 +70,38 @@ struct param { // private: // public: -string vectorToString ( vector vec ); -string vectorToString ( vector vec ); -string vectorToString ( vector vec ); -string vectorToString ( vector vec, string s ); -string vectorToString ( vector vec, string s ); -string vectorToString ( vector vec, string s ); -string vectorToString ( vector vec, string s ); -string vectorToString ( char* vec, string s, int taille ); -string vectorToString ( int* vec, string s , int taille ); -string vectorToString ( bool* vec, string s , int taille ); -vector subVector ( vector vec, int start, int end ); -vector subVector ( vector vec, int start, int end ); -vector subVector ( vector vec, int start, int end ); -vector copyVector ( vector vec ); -vector copyVector ( vector vec ); -vector copyVector ( vector vec ); -vector stringToVector ( string s, string tok ); -vector stringToVector ( char s, string tok ); -vector stringToVector ( int s, string tok ); -vector stringToVectorInt ( string s, string tok ); -vector stringToVectorFloat ( string s, string tok ); -string lowerCase(string str); -string removePunct(string str); -string tokenizePunct(string str); -string removePunctTercom(string str); -string normalizeStd(string str); -string printParams(param p); -string join ( string delim, vector arr ); + string vectorToString ( vector vec ); + string vectorToString ( vector vec ); + string vectorToString ( vector vec ); + string vectorToString ( vector vec, string s ); + string vectorToString ( vector vec, string s ); + string vectorToString ( vector vec, string s ); + string vectorToString ( vector vec, string s ); + string vectorToString ( char* vec, string s, int taille ); + string vectorToString ( int* vec, string s , int taille ); + string vectorToString ( bool* vec, string s , int taille ); + string vectorToString ( vector* vec, string s, int taille ); + string vectorToString ( vector* vec, string s , int taille ); + string vectorToString ( vector* vec, string s , int taille ); + vector subVector ( vector vec, int start, int end ); + vector subVector ( vector vec, int start, int end ); + vector subVector ( vector vec, int start, int end ); + vector copyVector ( vector vec ); + vector copyVector ( vector vec ); + vector copyVector ( vector vec ); + vector stringToVector ( string s, string tok ); + vector stringToVector ( char s, string tok ); + vector stringToVector ( int s, string tok ); + vector stringToVectorInt ( string s, string tok ); + vector stringToVectorFloat ( string s, string tok ); + string lowerCase(string str); + string removePunct(string str); + string tokenizePunct(string str); + string removePunctTercom(string str); + string normalizeStd(string str); + string printParams(param p); + string join ( string delim, vector arr ); // }; -param copyParam(param p); + param copyParam(param p); } #endif From d0ff70decc98827d3adf9943b2a88d1cad0945be Mon Sep 17 00:00:00 2001 From: Christophe SERVAN Date: Mon, 16 Feb 2015 19:34:41 +0100 Subject: [PATCH 2/5] Change Namespace in TER library --- mert/TER/alignmentStruct.cpp | 2 +- mert/TER/alignmentStruct.h | 4 ++-- mert/TER/bestShiftStruct.cpp | 2 +- mert/TER/bestShiftStruct.h | 4 ++-- mert/TER/hashMap.cpp | 2 +- mert/TER/hashMap.h | 2 +- mert/TER/hashMapInfos.cpp | 2 +- mert/TER/hashMapInfos.h | 2 +- mert/TER/hashMapStringInfos.cpp | 2 +- mert/TER/hashMapStringInfos.h | 2 +- mert/TER/infosHasher.cpp | 4 ++-- mert/TER/infosHasher.h | 2 +- mert/TER/stringHasher.cpp | 2 +- mert/TER/stringHasher.h | 2 +- mert/TER/stringInfosHasher.cpp | 4 ++-- mert/TER/stringInfosHasher.h | 2 +- mert/TER/terAlignment.cpp | 2 +- mert/TER/terAlignment.h | 4 ++-- mert/TER/terShift.cpp | 2 +- mert/TER/terShift.h | 4 ++-- mert/TER/tercalc.cpp | 4 ++-- mert/TER/tercalc.h | 6 +++--- mert/TER/tools.cpp | 4 ++-- mert/TER/tools.h | 2 +- mert/TerScorer.cpp | 2 +- 25 files changed, 35 insertions(+), 35 deletions(-) diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp index e2a880396d..74e8119027 100644 --- a/mert/TER/alignmentStruct.cpp +++ b/mert/TER/alignmentStruct.cpp @@ -21,7 +21,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "alignmentStruct.h" using namespace std; -namespace TERCpp +namespace TERCPPNS_TERCpp { string alignmentStruct::toString() { diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h index 0963fbe94e..87d4ba9fab 100644 --- a/mert/TER/alignmentStruct.h +++ b/mert/TER/alignmentStruct.h @@ -30,9 +30,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -using namespace Tools; +using namespace TERCPPNS_Tools; -namespace TERCpp +namespace TERCPPNS_TERCpp { class alignmentStruct { diff --git a/mert/TER/bestShiftStruct.cpp b/mert/TER/bestShiftStruct.cpp index 8c27f1ff81..1e3ea0b522 100644 --- a/mert/TER/bestShiftStruct.cpp +++ b/mert/TER/bestShiftStruct.cpp @@ -22,7 +22,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace TERCpp +namespace TERCPPNS_TERCpp { bestShiftStruct::bestShiftStruct() { diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h index 144787faae..eb8465ffbc 100644 --- a/mert/TER/bestShiftStruct.h +++ b/mert/TER/bestShiftStruct.h @@ -32,9 +32,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -using namespace Tools; +using namespace TERCPPNS_Tools; -namespace TERCpp +namespace TERCPPNS_TERCpp { class bestShiftStruct { diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp index de84ff796d..3d17ae42ba 100644 --- a/mert/TER/hashMap.cpp +++ b/mert/TER/hashMap.cpp @@ -25,7 +25,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { // hashMap::hashMap(); /* hashMap::~hashMap() diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h index 017e6b831b..40cd9e187c 100644 --- a/mert/TER/hashMap.h +++ b/mert/TER/hashMap.h @@ -33,7 +33,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class hashMap { diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp index 23f57d8088..1ca39240c2 100644 --- a/mert/TER/hashMapInfos.cpp +++ b/mert/TER/hashMapInfos.cpp @@ -25,7 +25,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { // hashMapInfos::hashMap(); /* hashMapInfos::~hashMap() diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h index 58cd50aefe..5e7f9c9ecf 100644 --- a/mert/TER/hashMapInfos.h +++ b/mert/TER/hashMapInfos.h @@ -32,7 +32,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class hashMapInfos { diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp index 773c148d4d..895a037ce5 100644 --- a/mert/TER/hashMapStringInfos.cpp +++ b/mert/TER/hashMapStringInfos.cpp @@ -25,7 +25,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { // hashMapStringInfos::hashMap(); /* hashMapStringInfos::~hashMap() diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h index 3ea3794e53..4ed986096c 100644 --- a/mert/TER/hashMapStringInfos.h +++ b/mert/TER/hashMapStringInfos.h @@ -32,7 +32,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class hashMapStringInfos { diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp index 8ce23ae443..1fe35b8ebf 100644 --- a/mert/TER/infosHasher.cpp +++ b/mert/TER/infosHasher.cpp @@ -23,9 +23,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -using namespace Tools; +using namespace TERCPPNS_Tools; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { infosHasher::infosHasher (long cle,string cleTxt, vector valueVecInt ) { diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h index 692bde49d3..0c5d44c3d6 100644 --- a/mert/TER/infosHasher.h +++ b/mert/TER/infosHasher.h @@ -29,7 +29,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "tools.h" using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class infosHasher { diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp index f4d1526e8b..9b69bd15c4 100644 --- a/mert/TER/stringHasher.cpp +++ b/mert/TER/stringHasher.cpp @@ -24,7 +24,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) { diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h index e2a79834c0..5e380c96e0 100644 --- a/mert/TER/stringHasher.h +++ b/mert/TER/stringHasher.h @@ -25,7 +25,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class stringHasher diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp index 007fd720f2..4d0312a52a 100644 --- a/mert/TER/stringInfosHasher.cpp +++ b/mert/TER/stringInfosHasher.cpp @@ -23,9 +23,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -using namespace Tools; +using namespace TERCPPNS_Tools; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector valueVecInt ) { diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h index f35e4596ba..596f9788e9 100644 --- a/mert/TER/stringInfosHasher.h +++ b/mert/TER/stringInfosHasher.h @@ -27,7 +27,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "tools.h" using namespace std; -namespace HashMapSpace +namespace TERCPPNS_HashMapSpace { class stringInfosHasher { diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp index dda4a42392..555c3adc69 100644 --- a/mert/TER/terAlignment.cpp +++ b/mert/TER/terAlignment.cpp @@ -21,7 +21,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "terAlignment.h" using namespace std; -namespace TERCpp +namespace TERCPPNS_TERCpp { terAlignment::terAlignment() diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h index e9524df7c6..6e2c78e3ec 100644 --- a/mert/TER/terAlignment.h +++ b/mert/TER/terAlignment.h @@ -30,8 +30,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -// using namespace HashMapSpace; -namespace TERCpp +// using namespace TERCPPNS_HashMapSpace; +namespace TERCPPNS_TERCpp { class terAlignment diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp index e271ad6a7d..870deea03d 100644 --- a/mert/TER/terShift.cpp +++ b/mert/TER/terShift.cpp @@ -21,7 +21,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "terShift.h" using namespace std; -namespace TERCpp +namespace TERCPPNS_TERCpp { // terShift::terShift() diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h index 65a812d155..10a3e0b0d7 100644 --- a/mert/TER/terShift.h +++ b/mert/TER/terShift.h @@ -30,9 +30,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -using namespace Tools; +using namespace TERCPPNS_Tools; -namespace TERCpp +namespace TERCPPNS_TERCpp { class terShift { diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp index 8a84b49b3c..ad7a5a15a6 100644 --- a/mert/TER/tercalc.cpp +++ b/mert/TER/tercalc.cpp @@ -31,8 +31,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // #include "tercalc.h" using namespace std; -using namespace Tools; -namespace TERCpp +using namespace TERCPPNS_Tools; +namespace TERCPPNS_TERCpp { terCalc::terCalc() diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h index 22b5e2c9df..bb1c68f15e 100644 --- a/mert/TER/tercalc.h +++ b/mert/TER/tercalc.h @@ -35,9 +35,9 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "bestShiftStruct.h" using namespace std; -using namespace Tools; -using namespace HashMapSpace; -namespace TERCpp +using namespace TERCPPNS_Tools; +using namespace TERCPPNS_HashMapSpace; +namespace TERCPPNS_TERCpp { // typedef size_t WERelement[2]; // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp index 22ee091a85..3c14884dfb 100644 --- a/mert/TER/tools.cpp +++ b/mert/TER/tools.cpp @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "tools.h" using namespace std; -using namespace boost::xpressive; -namespace Tools +using namespace TERCPPNS_boost::xpressive; +namespace TERCPPNS_Tools { string vectorToString ( vector vec ) diff --git a/mert/TER/tools.h b/mert/TER/tools.h index 4c3b108cd7..19ceaa9677 100644 --- a/mert/TER/tools.h +++ b/mert/TER/tools.h @@ -33,7 +33,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; -namespace Tools +namespace TERCPPNS_Tools { typedef vector vecDouble; typedef vector vecChar; diff --git a/mert/TerScorer.cpp b/mert/TerScorer.cpp index fc40fdc828..2e60f5da2d 100644 --- a/mert/TerScorer.cpp +++ b/mert/TerScorer.cpp @@ -10,7 +10,7 @@ #include "Util.h" using namespace std; -using namespace TERCpp; +using namespace TERCPPNS_TERCpp; namespace MosesTuning { From 6028c7cf9c256e5df80f71b52d912c47dab31abd Mon Sep 17 00:00:00 2001 From: Matthias Huck Date: Mon, 16 Feb 2015 18:39:25 +0000 Subject: [PATCH 3/5] tuneable-components config parameter for feature functions --- moses/FF/FeatureFunction.cpp | 31 +++++- moses/FF/FeatureFunction.h | 14 +++ moses/Parameter.cpp | 26 ++++- moses/ScoreComponentCollection.cpp | 6 +- moses/Util.cpp | 6 +- scripts/training/mert-moses.pl | 151 +++++++++++++++++++++-------- 6 files changed, 182 insertions(+), 52 deletions(-) diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index 71f4ff5682..fa898857da 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -50,6 +50,7 @@ FeatureFunction(const std::string& line) , m_verbosity(std::numeric_limits::max()) , m_numScoreComponents(1) { + m_numTuneableComponents = m_numScoreComponents; Initialize(line); } @@ -61,6 +62,7 @@ FeatureFunction(size_t numScoreComponents, , m_verbosity(std::numeric_limits::max()) , m_numScoreComponents(numScoreComponents) { + m_numTuneableComponents = m_numScoreComponents; Initialize(line); } @@ -95,6 +97,7 @@ void FeatureFunction::ParseLine(const std::string &line) if (args[0] == "num-features") { m_numScoreComponents = Scan(args[1]); + m_numTuneableComponents = m_numScoreComponents; } else if (args[0] == "name") { m_description = args[1]; } else { @@ -120,13 +123,17 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va { if (key == "tuneable") { m_tuneable = Scan(value); + } else if (key == "tuneable-components") { + UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription() + << ": tuneable-components cannot be set if tuneable=false"); + SetTuneableComponents(value); } else if (key == "require-sorting-after-source-context") { m_requireSortingAfterSourceContext = Scan(value); } else if (key == "verbosity") { m_verbosity = Scan(value); } else if (key == "filterable") { //ignore } else { - UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value); + UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value); } } @@ -142,7 +149,27 @@ void FeatureFunction::ReadParameters() std::vector FeatureFunction::DefaultWeights() const { - UTIL_THROW(util::Exception, "No default weights"); + UTIL_THROW2(GetScoreProducerDescription() << ": No default weights"); +} + +void FeatureFunction::SetTuneableComponents(const std::string& value) +{ + std::vector toks = Tokenize(value,","); + UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription() + << ": Empty tuneable-components"); + UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription() + << ": tuneable-components value has to be a comma-separated list of " + << m_numScoreComponents << " boolean values"); + + m_tuneableComponents.resize(m_numScoreComponents); + m_numTuneableComponents = m_numScoreComponents; + + for (size_t i = 0; i < toks.size(); ++i) { + m_tuneableComponents[i] = Scan(toks[i]); + if (!m_tuneableComponents[i]) { + --m_numTuneableComponents; + } + } } } diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index 7672701409..b59998d9d6 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -39,6 +39,8 @@ class FeatureFunction bool m_requireSortingAfterSourceContext; size_t m_verbosity; size_t m_numScoreComponents; + std::vector m_tuneableComponents; + size_t m_numTuneableComponents; //In case there's multiple producers with the same description static std::multiset description_counts; @@ -90,6 +92,17 @@ class FeatureFunction return m_tuneable; } + virtual bool HasTuneableComponents() const { + return m_numTuneableComponents; + } + + virtual bool IsTuneableComponent(size_t i) const { + if (m_numTuneableComponents == m_numScoreComponents) { + return true; + } + return m_tuneableComponents[i]; + } + virtual bool RequireSortingAfterSourceContext() const { return m_requireSortingAfterSourceContext; } @@ -151,6 +164,7 @@ class FeatureFunction virtual void SetParameter(const std::string& key, const std::string& value); virtual void ReadParameters(); + virtual void SetTuneableComponents(const std::string& value); }; } diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index 6052624cce..0080568303 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -970,11 +970,13 @@ void Parameter::WeightOverwrite() // should only be on 1 line UTIL_THROW_IF2(vec.size() != 1, - "Weight override should only be on 1 line"); + "weight-overwrite should only be on 1 line"); string name(""); vector weights; vector toks = Tokenize(vec[0]); + size_t cnt = 0; + const std::vector* oldWeights = NULL; for (size_t i = 0; i < toks.size(); ++i) { const string &tok = toks[i]; @@ -988,14 +990,30 @@ void Parameter::WeightOverwrite() } name = tok.substr(0, tok.size() - 1); + std::map >::const_iterator found = m_weights.find(name); + if (found!=m_weights.end()) { + oldWeights = &(found->second); + } else { + oldWeights = NULL; + } + cnt = 0; } else { // a weight for curr ff - float weight = Scan(toks[i]); - weights.push_back(weight); + if (toks[i] == "x") { + UTIL_THROW_IF2(!oldWeights || cnt>=oldWeights->size(), + "Keeping previous weight failed in weight-overwrite"); + weights.push_back(oldWeights->at(cnt)); + } else { + float weight = Scan(toks[i]); + weights.push_back(weight); + } + ++cnt; } } - m_weights[name] = weights; + if (name != "") { + m_weights[name] = weights; + } } diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index a1c8646923..e656743ec3 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -332,14 +332,16 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out bool labeledOutput = staticData.IsLabeledNBestList(); // regular features (not sparse) - if (ff->GetNumScoreComponents() != 0) { + if (ff->HasTuneableComponents()) { if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) { lastName = ff->GetScoreProducerDescription(); out << " " << lastName << "="; } vector scores = GetScoresForProducer( ff ); for (size_t j = 0; jIsTuneableComponent(j)) { + out << " " << scores[j]; + } } } diff --git a/moses/Util.cpp b/moses/Util.cpp index 5b6f16e2b3..1d1df7d58a 100644 --- a/moses/Util.cpp +++ b/moses/Util.cpp @@ -348,7 +348,11 @@ void PrintFeatureWeight(const FeatureFunction* ff) size_t numScoreComps = ff->GetNumScoreComponents(); vector values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); for (size_t i = 0; i < numScoreComps; ++i) { - cout << " " << values[i]; + if (ff->IsTuneableComponent(i)) { + cout << " " << values[i]; + } else { + cout << " UNTUNEABLECOMPONENT"; + } } cout << endl; diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl index 25d12a8ab9..5a2160a67e 100755 --- a/scripts/training/mert-moses.pl +++ b/scripts/training/mert-moses.pl @@ -576,8 +576,9 @@ my $featlist = get_featlist_from_moses($___CONFIG); $featlist = insert_ranges_to_featlist($featlist, $___RANGES); -# Mark which features are disabled: +# Mark which features are disabled if (defined $___ACTIVATE_FEATURES) { + $featlist->{"enabled"} = undef; my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES; my %cnt; for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) { @@ -1145,6 +1146,8 @@ } my $best_featlist = get_featlist_from_file("run$bestit.dense"); $best_featlist->{"untuneables"} = $featlist->{"untuneables"}; + $best_featlist->{"allcomponentsuntuneable"} = $featlist->{"allcomponentsuntuneable"}; + $best_featlist->{"skippeduntuneablecomponents"} = $featlist->{"skippeduntuneablecomponents"}; create_config($___CONFIG_ORIG, "./moses.ini", $best_featlist, $bestit, $bestbleu, $best_sparse_file); } @@ -1235,10 +1238,26 @@ sub run_decoder { } # moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y" my %model_weights; + my $valcnt = 0; + my $offset = 0; for(my $i=0; $i{"names"}}); $i++) { my $name = $featlist->{"names"}->[$i]; - $model_weights{$name} = "$name=" if !defined $model_weights{$name}; + if (!defined $model_weights{$name}) { + $model_weights{$name} = "$name="; + $valcnt = 0; + while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) { + #$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset}; + $model_weights{$name} .= sprintf " x"; + $offset++; + } + } $model_weights{$name} .= sprintf " %.6f", $vals[$i]; + $valcnt++; + while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) { + #$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset}; + $model_weights{$name} .= sprintf " x"; + $offset++; + } } my $decoder_config = ""; $decoder_config = "-weight-overwrite '" . join(" ", values %model_weights) ."'" unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1; @@ -1362,8 +1381,11 @@ sub get_featlist_from_file { my @names = (); my @startvalues = (); my @untuneables = (); + my @allcomponentsuntuneable = (); + my %skippeduntuneablecomponents = (); open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!"; my $nr = 0; + my $i = 0; my @errs = (); while (<$fh>) { $nr++; @@ -1373,11 +1395,25 @@ sub get_featlist_from_file { next if (!defined($valuesStr)); my @values = split(/ /, $valuesStr); - foreach my $value (@values) { - push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n" - if $value !~ /^[+-]?[0-9.\-e]+$/; - push @names, $longname; - push @startvalues, $value; + my $valcnt = 0; + my $hastuneablecomponent = 0; + foreach my $value (@values) { + if ($value =~ /^UNTUNEABLECOMPONENT$/) { + $skippeduntuneablecomponents{$longname}{$valcnt} = 1; + $i++; + $valcnt++; + } elsif ($value =~ /^[+-]?[0-9.\-e]+$/) { + push @names, $longname; + push @startvalues, $value; + $i++; + $valcnt++; + $hastuneablecomponent = 1; + } else { + push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n" + } + } + if (!$hastuneablecomponent) { + push @allcomponentsuntuneable, $longname; } } elsif (/^(\S+) UNTUNEABLE$/) { @@ -1391,7 +1427,7 @@ sub get_featlist_from_file { warn join("", @errs); exit 1; } - return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables}; + return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables, "allcomponentsuntuneable"=>\@allcomponentsuntuneable, "skippeduntuneablecomponents"=>\%skippeduntuneablecomponents}; } @@ -1487,6 +1523,8 @@ sub create_config { print $out "# We were before running iteration $iteration\n"; print $out "# finished ".`date`; + my %oldvalues = (); + my $line = <$ini_fh>; while(1) { last unless $line; @@ -1501,34 +1539,51 @@ sub create_config { # parameter name my $parameter = $1; - if ($parameter eq "weight") { - # leave weights 'til last. We're changing it - while ($line = <$ini_fh>) { - last if $line =~ /^\[/; - if ($line =~ /^([^_=\s]+)/) { - for( @{$featlist->{"untuneables"}} ){ - if ($1 eq $_ ) {# if weight is untuneable, copy it into new config - push @keep_weights, $line; - } - } - } - } - } - elsif (defined($P{$parameter})) { - # found a param (thread, verbose etc) that we're overriding. Leave to the end - while ($line = <$ini_fh>) { - last if $line =~ /^\[/; - } - } - else { - # unchanged parameter, write old - print $out "[$parameter]\n"; - while ($line = <$ini_fh>) { - last if $line =~ /^\[/; - print $out $line; - } - } - } + if ($parameter eq "weight") { + # leave weights 'til last. We're changing it + while ($line = <$ini_fh>) { + last if $line =~ /^\[/; + if ($line =~ /^(\S+)= (.+)$/) { + for( @{$featlist->{"untuneables"}} ){ + if ($1 eq $_ ) {# if weight is untuneable, copy it into new config + push @keep_weights, $line; + } + } + for( @{$featlist->{"allcomponentsuntuneable"}} ){ + if ($1 eq $_ ) {# if all dense weights are untuneable, copy it into new config + push @keep_weights, $line; + } + } + + my ($longname, $valuesStr) = ($1, $2); + next if (!defined($valuesStr)); + print $valuesStr; + my @values = split(/ /, $valuesStr); + my $valcnt = 0; + foreach my $value (@values) { + if ($value =~ /^[+-]?[0-9.\-e]+$/) { + $oldvalues{$longname}{$valcnt} = $value; + } + $valcnt++; + } + } + } + } + elsif (defined($P{$parameter})) { + # found a param (thread, verbose etc) that we're overriding. Leave to the end + while ($line = <$ini_fh>) { + last if $line =~ /^\[/; + } + } + else { + # unchanged parameter, write old + print $out "[$parameter]\n"; + while ($line = <$ini_fh>) { + last if $line =~ /^\[/; + print $out $line; + } + } + } # write all additional parameters foreach my $parameter (keys %P) { @@ -1543,20 +1598,30 @@ sub create_config { my $prevName = ""; my $outStr = ""; + my $valcnt = 0; + my $offset = 0; for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) { my $name = $featlist->{"names"}->[$i]; my $val = $featlist->{"values"}->[$i]; - if ($prevName eq $name) { - $outStr .= " $val"; + if ($prevName ne $name) { + print $out "$outStr\n"; + $valcnt = 0; + $outStr = "$name="; + $prevName = $name; + while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) { + $outStr .= " $oldvalues{$name}{$valcnt+$offset}"; + $offset++; + } } - else { - print $out "$outStr\n"; - $outStr = "$name= $val"; - $prevName = $name; + $outStr .= " $val"; + $valcnt++; + while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) { + $outStr .= " $oldvalues{$name}{$valcnt+$offset}"; + $offset++; } } - print $out "$outStr\n"; + print $out "$outStr\n"; for (@keep_weights) { print $out $_; From 90471e22a8513af2ae8f202345bfc896a517d063 Mon Sep 17 00:00:00 2001 From: Christophe SERVAN Date: Mon, 16 Feb 2015 19:46:25 +0100 Subject: [PATCH 4/5] Change Namespace in TER library --- mert/TER/tercalc.cpp | 6 +++--- mert/TER/tercalc.h | 4 ++-- mert/TER/tools.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp index ad7a5a15a6..fac17c82bd 100644 --- a/mert/TER/tercalc.cpp +++ b/mert/TER/tercalc.cpp @@ -892,7 +892,7 @@ namespace TERCPPNS_TERCpp } } - vector * terCalc::calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ) + vector * terCalc::calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCPPNS_TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ) { vector * allshifts = new vector(0); // to_return.clear(); @@ -1072,11 +1072,11 @@ namespace TERCPPNS_TERCpp } - alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift& s ) + alignmentStruct terCalc::permuter ( vector< string >& words, TERCPPNS_TERCpp::terShift& s ) { return permuter ( words, s.start, s.end, s.newloc ); } - alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift* s ) + alignmentStruct terCalc::permuter ( vector< string >& words, TERCPPNS_TERCpp::terShift* s ) { return permuter ( words, s->start, s->end, s->newloc ); } diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h index bb1c68f15e..7fac9d4e3b 100644 --- a/mert/TER/tercalc.h +++ b/mert/TER/tercalc.h @@ -98,9 +98,9 @@ namespace TERCPPNS_TERCpp terAlignment TER ( vector& hyp, vector& ref , float avRefLength ); terAlignment TER ( vector& hyp, vector& ref ); terAlignment TER ( vector& hyp, vector& ref ); - bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& med_align ); + bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCPPNS_TERCpp::terAlignment& med_align ); void calculateTerAlignment ( terAlignment& align, vector* herr, vector* rerr, vector* ralign ); - vector * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ); + vector * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCPPNS_TERCpp::terAlignment& align, vector* herr, vector* rerr, vector* ralign ); alignmentStruct permuter ( vector& words, terShift& s ); alignmentStruct permuter ( vector& words, terShift* s ); alignmentStruct permuter ( vector& words, int start, int end, int newloc ); diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp index 3c14884dfb..d03deb1f97 100644 --- a/mert/TER/tools.cpp +++ b/mert/TER/tools.cpp @@ -21,7 +21,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "tools.h" using namespace std; -using namespace TERCPPNS_boost::xpressive; +using namespace boost::xpressive; namespace TERCPPNS_Tools { From 93ab057eda69a7915efbc9fa92d4ce6341e6ca02 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 16 Feb 2015 13:27:26 -0500 Subject: [PATCH 5/5] Try to fix Boost static/dynamic linkage --- jam-files/sanity.jam | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 23ed94e401..5bc7904625 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -137,24 +137,15 @@ rule boost-lib ( name macro : deps * ) { main = "BOOST_AUTO_TEST_CASE(foo) {}" ; flags += " -DBOOST_TEST_MODULE=CompileTest $(I-boost-include) -include boost/test/unit_test.hpp" ; } - if $(boost-auto-shared) = "shared" { - flags += " -DBOOST_$(macro)" ; + local default-linkage ; + if [ test_flags " -Wl,-Bstatic $(flags) -Wl,-Bdynamic " : $(main) ] { + default-linkage = "static" ; } else { - flags = " -Wl,-Bstatic $(flags) -Wl,-Bdynamic " ; - } - if [ test_flags $(flags) : $(main) ] { - lib inner_boost_$(name) : : single $(boost-search) boost_$(name)$(boost-lib-version) : static : $(deps) ; - lib inner_boost_$(name) : : multi $(boost-search) boost_$(name)$(boost-lib-version) : static : $(deps) ; - } else { - lib inner_boost_$(name) : : $(boost-search) boost_$(name)$(boost-lib-version) : : $(deps) ; - } - - if $(boost-auto-shared) = "shared" { - alias boost_$(name) : inner_boost_$(name) : shared ; + default-linkage = "shared" ; requirements += BOOST_$(macro) ; - } else { - alias boost_$(name) : inner_boost_$(name) : static ; } + lib boost_$(name) : $(deps) : static $(boost-search) boost_$(name)$(boost-lib-version) : $(default-linkage) ; + lib boost_$(name) : $(deps) : shared $(boost-search) boost_$(name)$(boost-lib-version) BOOST_$(macro) : $(default-linkage) : BOOST_$(macro) ; } #Argument is e.g. 103600 @@ -177,14 +168,11 @@ rule boost ( min-version ) { boost-lib-version = "" ; } - #Are we linking static binaries against shared boost? - boost-auto-shared = [ auto-shared "boost_program_options"$(boost-lib-version) : $(L-boost-search) ] ; - #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define. boost-lib system SYSTEM_DYN_LINK ; boost-lib thread THREAD_DYN_DLL : boost_system ; boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ; - boost-lib unit_test_framework DELETE_ME_TEST_DYN_LINK ; + boost-lib unit_test_framework TEST_DYN_LINK ; boost-lib iostreams IOSTREAMS_DYN_LINK ; boost-lib filesystem FILE_SYSTEM_DYN_LINK ; # if $(BOOST-VERSION) >= 104800 {