Skip to content

Commit

Permalink
[wip] 2nd round
Browse files Browse the repository at this point in the history
  • Loading branch information
vigsterkr committed Aug 11, 2017
1 parent bd12425 commit 16a3602
Show file tree
Hide file tree
Showing 122 changed files with 1,566 additions and 1,393 deletions.
18 changes: 9 additions & 9 deletions src/shogun/classifier/NearestCentroid.cpp
Expand Up @@ -62,26 +62,26 @@ namespace shogun{
{
data = distance->get_lhs();
}
int32_t num_vectors = data->get_num_vectors();
int32_t num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
int32_t num_feats = ((CDenseFeatures<float64_t>*) data)->get_num_features();
index_t num_vectors = data->get_num_vectors();
index_t num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
index_t num_feats = ((CDenseFeatures<float64_t>*) data)->get_num_features();
SGMatrix<float64_t> centroids(num_feats,num_classes);
centroids.zero();

m_centroids->set_num_features(num_feats);
m_centroids->set_num_vectors(num_classes);

int64_t* num_per_class = new int64_t[num_classes];
for (int32_t i=0 ; i<num_classes ; i++)
for (index_t i=0 ; i<num_classes ; i++)
{
num_per_class[i]=0;
}

for (int32_t idx=0 ; idx<num_vectors ; idx++)
for (index_t idx=0 ; idx<num_vectors ; idx++)
{
int32_t current_len;
index_t current_len;
bool current_free;
int32_t current_class = ((CMulticlassLabels*) m_labels)->get_label(idx);
index_t current_class = ((CMulticlassLabels*) m_labels)->get_label(idx);
float64_t* target = centroids.matrix + num_feats*current_class;
float64_t* current = ((CDenseFeatures<float64_t>*)data)->get_feature_vector(idx,current_len,current_free);
SGVector<float64_t>::add(target,1.0,target,1.0,current,current_len);
Expand All @@ -90,10 +90,10 @@ namespace shogun{
}


for (int32_t i=0 ; i<num_classes ; i++)
for (index_t i=0 ; i<num_classes ; i++)
{
float64_t* target = centroids.matrix + num_feats*i;
int32_t total = num_per_class[i];
index_t total = num_per_class[i];
float64_t scale = 0;
if(total>1)
scale = 1.0/((float64_t)(total-1));
Expand Down
16 changes: 8 additions & 8 deletions src/shogun/classifier/PluginEstimate.cpp
Expand Up @@ -70,15 +70,15 @@ bool CPluginEstimate::train_machine(CFeatures* data)
SG_REF(pos_model);
SG_REF(neg_model);

int32_t* pos_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
int32_t* neg_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
index_t* pos_indizes=SG_MALLOC(index_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
index_t* neg_indizes=SG_MALLOC(index_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());

ASSERT(m_labels->get_num_labels()==features->get_num_vectors())

int32_t pos_idx=0;
int32_t neg_idx=0;
index_t pos_idx=0;
index_t neg_idx=0;

for (int32_t i=0; i<m_labels->get_num_labels(); i++)
for (index_t i=0; i<m_labels->get_num_labels(); i++)
{
if (((CBinaryLabels*) m_labels)->get_label(i) > 0)
pos_indizes[pos_idx++]=i;
Expand Down Expand Up @@ -112,17 +112,17 @@ CBinaryLabels* CPluginEstimate::apply_binary(CFeatures* data)
ASSERT(features)
SGVector<float64_t> result(features->get_num_vectors());

for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
for (index_t vec=0; vec<features->get_num_vectors(); vec++)
result[vec] = apply_one(vec);

return new CBinaryLabels(result);
}

float64_t CPluginEstimate::apply_one(int32_t vec_idx)
float64_t CPluginEstimate::apply_one(index_t vec_idx)
{
ASSERT(features)

int32_t len;
index_t len;
bool free_vec;
uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);

Expand Down
18 changes: 9 additions & 9 deletions src/shogun/classifier/PluginEstimate.h
Expand Up @@ -72,7 +72,7 @@ class CPluginEstimate: public CMachine
virtual CStringFeatures<uint16_t>* get_features() { SG_REF(features); return features; }

/// classify the test feature vector indexed by vec_idx
float64_t apply_one(int32_t vec_idx);
float64_t apply_one(index_t vec_idx);

/** obsolete posterior log odds
*
Expand All @@ -81,7 +81,7 @@ class CPluginEstimate: public CMachine
* @return something floaty
*/
inline float64_t posterior_log_odds_obsolete(
uint16_t* vector, int32_t len)
uint16_t* vector, index_t len)
{
return pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
}
Expand All @@ -93,7 +93,7 @@ class CPluginEstimate: public CMachine
* @return log odd at position
*/
inline float64_t get_parameterwise_log_odds(
uint16_t obs, int32_t position)
uint16_t obs, index_t position)
{
return pos_model->get_positional_log_parameter(obs, position) - neg_model->get_positional_log_parameter(obs, position);
}
Expand All @@ -104,7 +104,7 @@ class CPluginEstimate: public CMachine
* @param pos position
* @return positive log derivative
*/
inline float64_t log_derivative_pos_obsolete(uint16_t obs, int32_t pos)
inline float64_t log_derivative_pos_obsolete(uint16_t obs, index_t pos)
{
return pos_model->get_log_derivative_obsolete(obs, pos);
}
Expand All @@ -115,7 +115,7 @@ class CPluginEstimate: public CMachine
* @param pos position
* @return negative log derivative
*/
inline float64_t log_derivative_neg_obsolete(uint16_t obs, int32_t pos)
inline float64_t log_derivative_neg_obsolete(uint16_t obs, index_t pos)
{
return neg_model->get_log_derivative_obsolete(obs, pos);
}
Expand All @@ -130,7 +130,7 @@ class CPluginEstimate: public CMachine
*/
inline bool get_model_params(
float64_t*& pos_params, float64_t*& neg_params,
int32_t &seq_length, int32_t &num_symbols)
index_t &seq_length, index_t &num_symbols)
{
if ((!pos_model) || (!neg_model))
{
Expand Down Expand Up @@ -158,9 +158,9 @@ class CPluginEstimate: public CMachine
*/
inline void set_model_params(
float64_t* pos_params, float64_t* neg_params,
int32_t seq_length, int32_t num_symbols)
index_t seq_length, index_t num_symbols)
{
int32_t num_params;
index_t num_params;

SG_UNREF(pos_model);
pos_model=new CLinearHMM(seq_length, num_symbols);
Expand All @@ -183,7 +183,7 @@ class CPluginEstimate: public CMachine
*
* @return number of parameters
*/
inline int32_t get_num_params()
inline index_t get_num_params()
{
return pos_model->get_num_model_parameters()+neg_model->get_num_model_parameters();
}
Expand Down
22 changes: 11 additions & 11 deletions src/shogun/classifier/svm/LibLinear.cpp
Expand Up @@ -92,9 +92,9 @@ bool CLibLinear::train_machine(CFeatures* data)
ASSERT(features)


int32_t num_train_labels=m_labels->get_num_labels();
int32_t num_feat=features->get_dim_feature_space();
int32_t num_vec=features->get_num_vectors();
index_t num_train_labels=m_labels->get_num_labels();
index_t num_feat=features->get_dim_feature_space();
index_t num_vec=features->get_num_vectors();

if (liblinear_solver_type == L1R_L2LOSS_SVC ||
(liblinear_solver_type == L1R_LR) )
Expand Down Expand Up @@ -146,7 +146,7 @@ bool CLibLinear::train_machine(CFeatures* data)
double Cp=C1;
double Cn=C2;

for (int32_t i=0; i<prob.l; i++)
for (index_t i=0; i<prob.l; i++)
{
prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
if (prob.y[i] == +1)
Expand Down Expand Up @@ -814,10 +814,10 @@ void CLibLinear::solve_l1r_lr(
double Cp, double Cn)
{
int l = prob_col->l;
int w_size = prob_col->n;
int j, s, iter = 0;
int active_size = w_size;
int max_num_linesearch = 20;
index_t w_size = prob_col->n;
index_t j, s, iter = 0;
index_t active_size = w_size;
index_t max_num_linesearch = 20;

double x_min = 0;
double sigma = 0.01;
Expand All @@ -829,7 +829,7 @@ void CLibLinear::solve_l1r_lr(
double sum2, appxcond2;
double cond;

int *index = SG_MALLOC(int, w_size);
index_t *index = SG_MALLOC(index_t, w_size);
int32_t *y = SG_MALLOC(int32_t, l);
double *exp_wTx = SG_MALLOC(double, l);
double *exp_wTx_new = SG_MALLOC(double, l);
Expand All @@ -840,7 +840,7 @@ void CLibLinear::solve_l1r_lr(

CDotFeatures* x = prob_col->x;
void* iterator;
int ind;
index_t ind;
double val;

double C[3] = {Cn,0,Cp};
Expand Down Expand Up @@ -1351,7 +1351,7 @@ void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term)
if (!m_labels)
SG_ERROR("Please assign labels first!\n")

int32_t num_labels=m_labels->get_num_labels();
index_t num_labels=m_labels->get_num_labels();

if (num_labels!=linear_term.vlen)
{
Expand Down
12 changes: 6 additions & 6 deletions src/shogun/classifier/svm/OnlineSVMSGD.cpp
Expand Up @@ -105,8 +105,8 @@ bool COnlineSVMSGD::train(CFeatures* data)
if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
is_log_loss = true;

int32_t vec_count;
for (int32_t e = 0; e < epochs && (!cancel_computation()); e++)
index_t vec_count;
for (index_t e = 0; e < epochs && (!cancel_computation()); e++)
{
vec_count=0;
count = skip;
Expand Down Expand Up @@ -162,13 +162,13 @@ bool COnlineSVMSGD::train(CFeatures* data)
return true;
}

void COnlineSVMSGD::calibrate(int32_t max_vec_num)
void COnlineSVMSGD::calibrate(index_t max_vec_num)
{
int32_t c_dim=1;
index_t c_dim=1;
float32_t* c=SG_CALLOC(float32_t, c_dim);

// compute average gradient size
int32_t n = 0;
index_t n = 0;
float64_t m = 0;
float64_t r = 0;

Expand Down Expand Up @@ -196,7 +196,7 @@ void COnlineSVMSGD::calibrate(int32_t max_vec_num)
bscale = 0.5*m/n;

// compute weight decay skip
skip = (int32_t) ((16 * n * c_dim) / r);
skip = (index_t) ((16 * n * c_dim) / r);

SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale)

Expand Down
6 changes: 3 additions & 3 deletions src/shogun/classifier/svm/OnlineSVMSGD.h
Expand Up @@ -161,7 +161,7 @@ class COnlineSVMSGD : public COnlineLinearMachine
* @param max_vec_num Maximum number of vectors to calibrate using
* (optional) if set to -1, tries to calibrate using all vectors
* */
void calibrate(int32_t max_vec_num=1000);
void calibrate(index_t max_vec_num=1000);

private:
void init();
Expand All @@ -174,8 +174,8 @@ class COnlineSVMSGD : public COnlineLinearMachine
float64_t wscale;
float64_t bscale;
int32_t epochs;
int32_t skip;
int32_t count;
index_t skip;
index_t count;

bool use_bias;
bool use_regularized_bias;
Expand Down
14 changes: 7 additions & 7 deletions src/shogun/clustering/GMM.cpp
Expand Up @@ -131,7 +131,7 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
SG_ERROR("No features to train on.\n")

CDotFeatures* dotdata=(CDotFeatures *) features;
int32_t num_vectors=dotdata->get_num_vectors();
index_t num_vectors=dotdata->get_num_vectors();

SGMatrix<float64_t> alpha;

Expand Down Expand Up @@ -163,7 +163,7 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
log_likelihood_prev=log_likelihood_cur;
log_likelihood_cur=0;

for (int32_t i=0; i<num_vectors; i++)
for (index_t i=0; i<num_vectors; i++)
{
logPx[i]=0;
SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
Expand Down Expand Up @@ -206,7 +206,7 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
SG_ERROR("Can't run SMEM with less than 3 component mixture model.\n")

CDotFeatures* dotdata=(CDotFeatures *) features;
int32_t num_vectors=dotdata->get_num_vectors();
index_t num_vectors=dotdata->get_num_vectors();

float64_t cur_likelihood=train_em(min_cov, max_em_iter, min_change);

Expand All @@ -219,15 +219,15 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
float64_t* logPostSumSum=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
float64_t* split_crit=SG_MALLOC(float64_t, m_components.size());
float64_t* merge_crit=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
int32_t* split_ind=SG_MALLOC(int32_t, m_components.size());
int32_t* merge_ind=SG_MALLOC(int32_t, m_components.size()*(m_components.size()-1)/2);
index_t* split_ind=SG_MALLOC(index_t, m_components.size());
index_t* merge_ind=SG_MALLOC(index_t, m_components.size()*(m_components.size()-1)/2);

while (iter<max_iter)
{
memset(logPostSum, 0, m_components.size()*sizeof(float64_t));
memset(logPostSum2, 0, m_components.size()*sizeof(float64_t));
memset(logPostSumSum, 0, (m_components.size()*(m_components.size()-1)/2)*sizeof(float64_t));
for (int32_t i=0; i<num_vectors; i++)
for (index_t i=0; i<num_vectors; i++)
{
logPx[i]=0;
SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
Expand Down Expand Up @@ -263,7 +263,7 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
logPostSum[i]=CMath::log(logPostSum[i]);
split_crit[i]=0;
split_ind[i]=i;
for (int32_t j=0; j<num_vectors; j++)
for (index_t j=0; j<num_vectors; j++)
{
split_crit[i]+=(logPost[j*m_components.size()+i]-logPostSum[i]-logPxy[j*m_components.size()+i]+CMath::log(m_coefficients[i]))*
(CMath::exp(logPost[j*m_components.size()+i])/CMath::exp(logPostSum[i]));
Expand Down

0 comments on commit 16a3602

Please sign in to comment.