Permalink
Browse files

Merge pull request #1365 from van51/feature/modular

Added HashedDocConverter to modular interfaces
  • Loading branch information...
Soeren Sonnenburg
Soeren Sonnenburg committed Aug 5, 2013
2 parents 00bbcf7 + d21ef9a commit 74d2c7d468aaf218a35ff7b86f10774a53ca6688
@@ -0,0 +1,48 @@
#!/usr/bin/env python
strings=['example document 1','example document 2','example document 3','example document 4']
parameter_list=[[strings]]
def converter_hasheddoc_modular(strings):
from shogun.Features import SparseRealFeatures, RAWBYTE, StringCharFeatures, Features, HashedDocDotFeatures
from shogun.Library import NGramTokenizer
from shogun.Converter import HashedDocConverter
from numpy import array
#create string features
f=StringCharFeatures(strings, RAWBYTE)
#set the number of bits of the target dimension
#means a dim of size 2^5=32
num_bits=5
#create the ngram tokenizer of size 8 to parse the strings
tokenizer=NGramTokenizer(8)
#normalize results
normalize=True
#create converter
converter=HashedDocConverter(tokenizer, num_bits, normalize)
converted_feats=converter.apply(f)
#should expect 32
print('Converted features\' space dimensionality is', converted_feats.get_dim_feature_space())
print('Self dot product of string 0 with converted feats:', converted_feats.dot(0, converted_feats, 0))
hashed_feats=HashedDocDotFeatures(num_bits, f, tokenizer, normalize)
print('Hashed features\' space dimensionality is', hashed_feats.get_dim_feature_space())
print('Self dot product of string 0 with hashed feats:', hashed_feats.dot(0, hashed_feats, 0))
return converted_feats
if __name__=='__main__':
print('HashedDocConverter')
converter_hasheddoc_modular(*parameter_list[0])
@@ -22,10 +22,11 @@
%rename(Isomap) CIsomap;
%rename(StochasticProximityEmbedding) CStochasticProximityEmbedding;
%rename(FactorAnalysis) CFactorAnalysis;
%rename (TDistributedStochasticNeighborEmbedding) CTDistributedStochasticNeighborEmbedding;
%rename (ManifoldSculpting) CManifoldSculpting;
%rename (Jade) CJade;
%rename (SOBI) CSOBI;
%rename(TDistributedStochasticNeighborEmbedding) CTDistributedStochasticNeighborEmbedding;
%rename(ManifoldSculpting) CManifoldSculpting;
%rename(Jade) CJade;
%rename(SOBI) CSOBI;
%rename(HashedDocConverter) CHashedDocConverter;
%newobject shogun::CEmbeddingConverter::apply;
%newobject shogun::*::embed_kernel;
@@ -50,3 +51,4 @@
%include <shogun/converter/ManifoldSculpting.h>
%include <shogun/converter/ica/Jade.h>
%include <shogun/converter/ica/SOBI.h>
%include <shogun/converter/HashedDocConverter.h>
@@ -18,4 +18,5 @@
#include <shogun/converter/ManifoldSculpting.h>
#include <shogun/converter/ica/Jade.h>
#include <shogun/converter/ica/SOBI.h>
#include <shogun/converter/HashedDocConverter.h>
%}

0 comments on commit 74d2c7d

Please sign in to comment.