-
-
Notifications
You must be signed in to change notification settings - Fork 1k
/
hasheddoc_benchmarks.cpp
51 lines (42 loc) · 1.4 KB
/
hasheddoc_benchmarks.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Evangelos Anagnostopoulos
*/
#include <shogun/base/init.h>
#include <shogun/classifier/svm/SVMOcas.h>
#include <shogun/features/HashedDocDotFeatures.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/lib/NGramTokenizer.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argv, char** argc)
{
init_shogun_with_defaults();
int32_t bits[] = {8, 10, 12, 16, 20};
int32_t bits_length = 5;
int32_t num_strings = 5000;
int32_t max_str_length = 10000;
SGStringList<char> string_list(num_strings, max_str_length);
SG_SPRINT("Creating features...\n");
for (index_t i=0; i<num_strings; i++)
{
string_list.strings[i] = SGString<char>(max_str_length);
for (index_t j=0; j<max_str_length; j++)
string_list.strings[i].string[j] = (char) CMath::random('A', 'Z');
}
SG_SPRINT("Features were created.\n");
CStringFeatures<char>* string_feats = new CStringFeatures<char>(string_list, RAWBYTE);
CNGramTokenizer* tzer = new CNGramTokenizer(3);
for (index_t i=0; i<bits_length; i++)
{
int32_t b = bits[i];
SG_SPRINT("Starting training for num_bits = %d\n", b);
SG_REF(string_feats);
SG_REF(tzer);
CHashedDocDotFeatures* feats = new CHashedDocDotFeatures(b, string_feats, tzer);
feats->benchmark_dense_dot_range();
feats->benchmark_add_to_dense_vector();
}
exit_shogun();
}