Skip to content

Commit

Permalink
added search for worst cases
Browse files Browse the repository at this point in the history
  • Loading branch information
perevalovds committed Oct 8, 2019
1 parent 0b1210a commit e5b54d3
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 4 deletions.
2 changes: 1 addition & 1 deletion example_analogy/src/ofApp.cpp
Expand Up @@ -111,7 +111,7 @@ void ofApp::setup(){

int count = 5;

vector<ofxWord2VecEmbeddingMatch> match = embed.match_cos(Vec, 5, used_indices);
vector<ofxWord2VecEmbeddingMatch> match = embed.match_cos(Vec, count, used_indices);
cout << "Result:" << endl;
for (int i = 0; i < match.size(); i++) {
cout << " " << match[i].word << ": " << match[i].conf << endl;
Expand Down
39 changes: 38 additions & 1 deletion src/ofxWord2VecEmbedding.cpp
Expand Up @@ -165,7 +165,7 @@ vector<ofxWord2VecEmbeddingMatch> ofxWord2VecEmbedding::match_cos(const ofxWord2
for (int d = count - 1; d > a; d--) {
match[d] = match[d - 1];
}
match[a] = ofxWord2VecEmbeddingMatch(vocab[i], dist);
match[a] = ofxWord2VecEmbeddingMatch(vocab[i], dist, i);
break;
}
}
Expand All @@ -175,3 +175,40 @@ vector<ofxWord2VecEmbeddingMatch> ofxWord2VecEmbedding::match_cos(const ofxWord2
}

//--------------------------------------------------------------
vector<ofxWord2VecEmbeddingMatch> ofxWord2VecEmbedding::match_worst_cos(const ofxWord2VecVector &v, int count,
const vector<int> &except_words) {

vector<ofxWord2VecEmbeddingMatch> worst(count);
for (auto &w : worst) {
w.conf = 2;
}

for (int i = 0; i < words; i++) {
//check if this word is allowable (not in 'except_words' array)
bool allow = true;
for (int j = 0; j < except_words.size(); j++) {
if (i == except_words[j]) {
allow = false;
break;
}
}
if (!allow) continue;

//compute distance and compare with matched
float dist = vec[i].dist_cosine_optimized(v);

for (int a = 0; a < count; a++) {
if (dist < worst[a].conf) {
for (int d = count - 1; d > a; d--) {
worst[d] = worst[d - 1];
}
worst[a] = ofxWord2VecEmbeddingMatch(vocab[i], dist, i);
break;
}
}
}

return worst;
}

//--------------------------------------------------------------
10 changes: 8 additions & 2 deletions src/ofxWord2VecEmbedding.h
Expand Up @@ -8,11 +8,13 @@
//result for matching
struct ofxWord2VecEmbeddingMatch {
string word; //word name
int index = -1; //word index
float conf = -2; //confidence [-1..1]
ofxWord2VecEmbeddingMatch() {}
ofxWord2VecEmbeddingMatch(string word, float conf) {
ofxWord2VecEmbeddingMatch(string word, float conf, int index) {
this->word = word;
this->conf = conf;
this->index = index;
}

};
Expand Down Expand Up @@ -41,10 +43,14 @@ struct ofxWord2VecEmbedding {
int find_case_sensitive(const string &word);
int find_case_insensitive(const string &word);

//find best mathing words to a given vector in cosine distance
//find best matching words to a given vector in cosine distance
vector<ofxWord2VecEmbeddingMatch> match_cos(const ofxWord2VecVector &v, int count,
const vector<int> &except_words = vector<int>());

//find worst matching words to a given vector in cosine distance
vector<ofxWord2VecEmbeddingMatch> match_worst_cos(const ofxWord2VecVector &v, int count,
const vector<int> &except_words = vector<int>());

protected:
bool speedup_word_search = false;
map<string, int> map_case_sens; //mapping word->index for fast word search
Expand Down

0 comments on commit e5b54d3

Please sign in to comment.