Skip to content

Commit

Permalink
Added HashedDocDotFeatures in modular interfaces
Browse files Browse the repository at this point in the history
  • Loading branch information
van51 committed Aug 5, 2013
1 parent d5f6740 commit 6d395aa
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 0 deletions.
@@ -0,0 +1,37 @@
#!/usr/bin/env python
strings=['hey','guys','i','am','a','string']

parameter_list=[[strings]]

def features_hasheddocdot_modular(strings):
from shogun.Features import StringCharFeatures, RAWBYTE
from shogun.Features import HashedDocDotFeatures
from shogun.Library import NGramTokenizer
from numpy import array

#create string features
f=StringCharFeatures(strings, RAWBYTE)

#set the number of bits of the target dimension
#means a dim of size 2^5=32
num_bits=5

#create the ngram tokenizer of size 8 to parse the strings
tokenizer=NGramTokenizer(8)

#normalize results
normalize=True

#create HashedDocDot features
hddf=HashedDocDotFeatures(num_bits, f, tokenizer, normalize)

#should expect 32
#print('Feature space dimensionality is', hddf.get_dim_feature_space())

#print('Self dot product of string 0', hddf.dot(0, hddf, 0))

return hddf

if __name__=='__main__':
print('HashedDocDotFeatures')
features_hasheddocdot_modular(*parameter_list[0])
2 changes: 2 additions & 0 deletions src/interfaces/modular/Features.i
Expand Up @@ -50,6 +50,7 @@
%rename(AttributeFeatures) CAttributeFeatures;
%rename(CombinedFeatures) CCombinedFeatures;
%rename(CombinedDotFeatures) CCombinedDotFeatures;
%rename(HashedDocDotFeatures) CHashedDocDotFeatures;
%rename(Labels) CLabels;
%rename(LabelsFactory) CLabelsFactory;

Expand Down Expand Up @@ -477,6 +478,7 @@ namespace shogun
%include <shogun/features/AttributeFeatures.h>
%include <shogun/features/CombinedFeatures.h>
%include <shogun/features/CombinedDotFeatures.h>
%include <shogun/features/HashedDocDotFeatures.h>

%include <shogun/labels/Labels.h>
%include <shogun/labels/LabelsFactory.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Features_includes.i
Expand Up @@ -20,6 +20,7 @@
#include <shogun/features/Alphabet.h>
#include <shogun/features/CombinedFeatures.h>
#include <shogun/features/CombinedDotFeatures.h>
#include <shogun/features/HashedDocDotFeatures.h>
#include <shogun/labels/Labels.h>
#include <shogun/labels/LabelsFactory.h>
#include <shogun/labels/DenseLabels.h>
Expand Down
6 changes: 6 additions & 0 deletions src/interfaces/modular/Library.i
Expand Up @@ -21,6 +21,9 @@
%rename(Hash) CHash;
%rename(StructuredData) CStructuredData;
%rename(DynamicObjectArray) CDynamicObjectArray;
%rename(Tokenizer) CTokenizer;
%rename(DelimiterTokenizer) CDelimiterTokenizer;
%rename(NGramTokenizer) CNGramTokenizer;

%rename(IndexBlock) CIndexBlock;
%rename(IndexBlockRelation) CIndexBlockRelation;
Expand Down Expand Up @@ -458,6 +461,9 @@ namespace shogun
/* Hash */
%include <shogun/lib/Hash.h>

%include <shogun/lib/Tokenizer.h>
%include <shogun/lib/DelimiterTokenizer.h>
%include <shogun/lib/NGramTokenizer.h>
%include <shogun/lib/Cache.h>
%include <shogun/lib/List.h>
%include <shogun/lib/Signal.h>
Expand Down
3 changes: 3 additions & 0 deletions src/interfaces/modular/Library_includes.i
Expand Up @@ -26,4 +26,7 @@
#include <shogun/lib/IndexBlockGroup.h>
#include <shogun/lib/IndexBlockTree.h>
#include <shogun/lib/Data.h>
#include <shogun/lib/Tokenizer.h>
#include <shogun/lib/DelimiterTokenizer.h>
#include <shogun/lib/NGramTokenizer.h>
%}
5 changes: 5 additions & 0 deletions src/shogun/features/HashedDocDotFeatures.cpp
Expand Up @@ -27,6 +27,11 @@ CHashedDocDotFeatures::CHashedDocDotFeatures(const CHashedDocDotFeatures& orig)
init(orig.num_bits, orig.doc_collection, orig.tokenizer, orig.should_normalize);
}

CHashedDocDotFeatures::CHashedDocDotFeatures(CFile* loader)
{
SG_NOTIMPLEMENTED;
}

void CHashedDocDotFeatures::init(int32_t hash_bits, CStringFeatures<char>* docs,
CTokenizer* tzer, bool normalize)
{
Expand Down

0 comments on commit 6d395aa

Please sign in to comment.