[wip] 2nd round

shogun-toolbox · Aug 11, 2017 · 16a3602 · 16a3602
1 parent bd12425
commit 16a3602
Show file tree

Hide file tree

Showing 122 changed files with 1,566 additions and 1,393 deletions.
diff --git a/src/shogun/classifier/NearestCentroid.cpp b/src/shogun/classifier/NearestCentroid.cpp
@@ -62,26 +62,26 @@ namespace shogun{
 		{
 			data = distance->get_lhs();
 		}
-		int32_t num_vectors = data->get_num_vectors();
-		int32_t num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
-		int32_t num_feats = ((CDenseFeatures<float64_t>*) data)->get_num_features();
+		index_t num_vectors = data->get_num_vectors();
+		index_t num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
+		index_t num_feats = ((CDenseFeatures<float64_t>*) data)->get_num_features();
 		SGMatrix<float64_t> centroids(num_feats,num_classes);
 		centroids.zero();
 
 		m_centroids->set_num_features(num_feats);
 		m_centroids->set_num_vectors(num_classes);
 
 		int64_t* num_per_class = new int64_t[num_classes];
-		for (int32_t i=0 ; i<num_classes ; i++)
+		for (index_t i=0 ; i<num_classes ; i++)
 		{
 			num_per_class[i]=0;
 		}
 
-		for (int32_t idx=0 ; idx<num_vectors ; idx++)
+		for (index_t idx=0 ; idx<num_vectors ; idx++)
 		{
-			int32_t current_len;
+			index_t current_len;
 			bool current_free;
-			int32_t current_class = ((CMulticlassLabels*) m_labels)->get_label(idx);
+			index_t current_class = ((CMulticlassLabels*) m_labels)->get_label(idx);
 			float64_t* target = centroids.matrix + num_feats*current_class;
 			float64_t* current = ((CDenseFeatures<float64_t>*)data)->get_feature_vector(idx,current_len,current_free);
 			SGVector<float64_t>::add(target,1.0,target,1.0,current,current_len);
@@ -90,10 +90,10 @@ namespace shogun{
 		}
 
 
-		for (int32_t i=0 ; i<num_classes ; i++)
+		for (index_t i=0 ; i<num_classes ; i++)
 		{
 			float64_t* target = centroids.matrix + num_feats*i;
-			int32_t total = num_per_class[i];
+			index_t total = num_per_class[i];
 			float64_t scale = 0;
 			if(total>1)
 				scale = 1.0/((float64_t)(total-1));

diff --git a/src/shogun/classifier/PluginEstimate.cpp b/src/shogun/classifier/PluginEstimate.cpp
@@ -70,15 +70,15 @@ bool CPluginEstimate::train_machine(CFeatures* data)
 	SG_REF(pos_model);
 	SG_REF(neg_model);
 
-	int32_t* pos_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
-	int32_t* neg_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
+	index_t* pos_indizes=SG_MALLOC(index_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
+	index_t* neg_indizes=SG_MALLOC(index_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
 
 	ASSERT(m_labels->get_num_labels()==features->get_num_vectors())
 
-	int32_t pos_idx=0;
-	int32_t neg_idx=0;
+	index_t pos_idx=0;
+	index_t neg_idx=0;
 
-	for (int32_t i=0; i<m_labels->get_num_labels(); i++)
+	for (index_t i=0; i<m_labels->get_num_labels(); i++)
 	{
 		if (((CBinaryLabels*) m_labels)->get_label(i) > 0)
 			pos_indizes[pos_idx++]=i;
@@ -112,17 +112,17 @@ CBinaryLabels* CPluginEstimate::apply_binary(CFeatures* data)
 	ASSERT(features)
 	SGVector<float64_t> result(features->get_num_vectors());
 
-	for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
+	for (index_t vec=0; vec<features->get_num_vectors(); vec++)
 		result[vec] = apply_one(vec);
 
 	return new CBinaryLabels(result);
 }
 
-float64_t CPluginEstimate::apply_one(int32_t vec_idx)
+float64_t CPluginEstimate::apply_one(index_t vec_idx)
 {
 	ASSERT(features)
 
-	int32_t len;
+	index_t len;
 	bool free_vec;
 	uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);
 

diff --git a/src/shogun/classifier/PluginEstimate.h b/src/shogun/classifier/PluginEstimate.h
@@ -72,7 +72,7 @@ class CPluginEstimate: public CMachine
 		virtual CStringFeatures<uint16_t>* get_features() { SG_REF(features); return features; }
 
 		/// classify the test feature vector indexed by vec_idx
-		float64_t apply_one(int32_t vec_idx);
+		float64_t apply_one(index_t vec_idx);
 
 		/** obsolete posterior log odds
 		 *
@@ -81,7 +81,7 @@ class CPluginEstimate: public CMachine
 		 * @return something floaty
 		 */
 		inline float64_t posterior_log_odds_obsolete(
-			uint16_t* vector, int32_t len)
+			uint16_t* vector, index_t len)
 		{
 			return pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
 		}
@@ -93,7 +93,7 @@ class CPluginEstimate: public CMachine
 		 * @return log odd at position
 		 */
 		inline float64_t get_parameterwise_log_odds(
-			uint16_t obs, int32_t position)
+			uint16_t obs, index_t position)
 		{
 			return pos_model->get_positional_log_parameter(obs, position) - neg_model->get_positional_log_parameter(obs, position);
 		}
@@ -104,7 +104,7 @@ class CPluginEstimate: public CMachine
 		 * @param pos position
 		 * @return positive log derivative
 		 */
-		inline float64_t log_derivative_pos_obsolete(uint16_t obs, int32_t pos)
+		inline float64_t log_derivative_pos_obsolete(uint16_t obs, index_t pos)
 		{
 			return pos_model->get_log_derivative_obsolete(obs, pos);
 		}
@@ -115,7 +115,7 @@ class CPluginEstimate: public CMachine
 		 * @param pos position
 		 * @return negative log derivative
 		 */
-		inline float64_t log_derivative_neg_obsolete(uint16_t obs, int32_t pos)
+		inline float64_t log_derivative_neg_obsolete(uint16_t obs, index_t pos)
 		{
 			return neg_model->get_log_derivative_obsolete(obs, pos);
 		}
@@ -130,7 +130,7 @@ class CPluginEstimate: public CMachine
 		 */
 		inline bool get_model_params(
 			float64_t*& pos_params, float64_t*& neg_params,
-			int32_t &seq_length, int32_t &num_symbols)
+			index_t &seq_length, index_t &num_symbols)
 		{
 			if ((!pos_model) || (!neg_model))
 			{
@@ -158,9 +158,9 @@ class CPluginEstimate: public CMachine
 		 */
 		inline void set_model_params(
 			float64_t* pos_params, float64_t* neg_params,
-			int32_t seq_length, int32_t num_symbols)
+			index_t seq_length, index_t num_symbols)
 		{
-			int32_t num_params;
+			index_t num_params;
 
 			SG_UNREF(pos_model);
 			pos_model=new CLinearHMM(seq_length, num_symbols);
@@ -183,7 +183,7 @@ class CPluginEstimate: public CMachine
 		 *
 		 * @return number of parameters
 		 */
-		inline int32_t get_num_params()
+		inline index_t get_num_params()
 		{
 			return pos_model->get_num_model_parameters()+neg_model->get_num_model_parameters();
 		}

diff --git a/src/shogun/classifier/svm/LibLinear.cpp b/src/shogun/classifier/svm/LibLinear.cpp
@@ -92,9 +92,9 @@ bool CLibLinear::train_machine(CFeatures* data)
 	ASSERT(features)
 
 
-	int32_t num_train_labels=m_labels->get_num_labels();
-	int32_t num_feat=features->get_dim_feature_space();
-	int32_t num_vec=features->get_num_vectors();
+	index_t num_train_labels=m_labels->get_num_labels();
+	index_t num_feat=features->get_dim_feature_space();
+	index_t num_vec=features->get_num_vectors();
 
 	if (liblinear_solver_type == L1R_L2LOSS_SVC ||
 			(liblinear_solver_type == L1R_LR) )
@@ -146,7 +146,7 @@ bool CLibLinear::train_machine(CFeatures* data)
 	double Cp=C1;
 	double Cn=C2;
 
-	for (int32_t i=0; i<prob.l; i++)
+	for (index_t i=0; i<prob.l; i++)
 	{
 		prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
 		if (prob.y[i] == +1)
@@ -814,10 +814,10 @@ void CLibLinear::solve_l1r_lr(
 	double Cp, double Cn)
 {
 	int l = prob_col->l;
-	int w_size = prob_col->n;
-	int j, s, iter = 0;
-	int active_size = w_size;
-	int max_num_linesearch = 20;
+	index_t w_size = prob_col->n;
+	index_t j, s, iter = 0;
+	index_t active_size = w_size;
+	index_t max_num_linesearch = 20;
 
 	double x_min = 0;
 	double sigma = 0.01;
@@ -829,7 +829,7 @@ void CLibLinear::solve_l1r_lr(
 	double sum2, appxcond2;
 	double cond;
 
-	int *index = SG_MALLOC(int, w_size);
+	index_t *index = SG_MALLOC(index_t, w_size);
 	int32_t *y = SG_MALLOC(int32_t, l);
 	double *exp_wTx = SG_MALLOC(double, l);
 	double *exp_wTx_new = SG_MALLOC(double, l);
@@ -840,7 +840,7 @@ void CLibLinear::solve_l1r_lr(
 
 	CDotFeatures* x = prob_col->x;
 	void* iterator;
-	int ind;
+	index_t ind;
 	double val;
 
 	double C[3] = {Cn,0,Cp};
@@ -1351,7 +1351,7 @@ void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term)
 	if (!m_labels)
 		SG_ERROR("Please assign labels first!\n")
 
-	int32_t num_labels=m_labels->get_num_labels();
+	index_t num_labels=m_labels->get_num_labels();
 
 	if (num_labels!=linear_term.vlen)
 	{

diff --git a/src/shogun/classifier/svm/OnlineSVMSGD.cpp b/src/shogun/classifier/svm/OnlineSVMSGD.cpp
@@ -105,8 +105,8 @@ bool COnlineSVMSGD::train(CFeatures* data)
 	if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
 		is_log_loss = true;
 
-	int32_t vec_count;
-	for (int32_t e = 0; e < epochs && (!cancel_computation()); e++)
+	index_t vec_count;
+	for (index_t e = 0; e < epochs && (!cancel_computation()); e++)
 	{
 		vec_count=0;
 		count = skip;
@@ -162,13 +162,13 @@ bool COnlineSVMSGD::train(CFeatures* data)
 	return true;
 }
 
-void COnlineSVMSGD::calibrate(int32_t max_vec_num)
+void COnlineSVMSGD::calibrate(index_t max_vec_num)
 {
-	int32_t c_dim=1;
+	index_t c_dim=1;
 	float32_t* c=SG_CALLOC(float32_t, c_dim);
 
 	// compute average gradient size
-	int32_t n = 0;
+	index_t n = 0;
 	float64_t m = 0;
 	float64_t r = 0;
 
@@ -196,7 +196,7 @@ void COnlineSVMSGD::calibrate(int32_t max_vec_num)
 	bscale = 0.5*m/n;
 
 	// compute weight decay skip
-	skip = (int32_t) ((16 * n * c_dim) / r);
+	skip = (index_t) ((16 * n * c_dim) / r);
 
 	SG_INFO("using %d examples. skip=%d  bscale=%.6f\n", n, skip, bscale)
 

diff --git a/src/shogun/classifier/svm/OnlineSVMSGD.h b/src/shogun/classifier/svm/OnlineSVMSGD.h
@@ -161,7 +161,7 @@ class COnlineSVMSGD : public COnlineLinearMachine
 		 * @param max_vec_num Maximum number of vectors to calibrate using
 		 * (optional) if set to -1, tries to calibrate using all vectors
 		 * */
-		void calibrate(int32_t max_vec_num=1000);
+		void calibrate(index_t max_vec_num=1000);
 
 	private:
 		void init();
@@ -174,8 +174,8 @@ class COnlineSVMSGD : public COnlineLinearMachine
 		float64_t wscale;
 		float64_t bscale;
 		int32_t epochs;
-		int32_t skip;
-		int32_t count;
+		index_t skip;
+		index_t count;
 
 		bool use_bias;
 		bool use_regularized_bias;

diff --git a/src/shogun/clustering/GMM.cpp b/src/shogun/clustering/GMM.cpp
@@ -131,7 +131,7 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 		SG_ERROR("No features to train on.\n")
 
 	CDotFeatures* dotdata=(CDotFeatures *) features;
-	int32_t num_vectors=dotdata->get_num_vectors();
+	index_t num_vectors=dotdata->get_num_vectors();
 
 	SGMatrix<float64_t> alpha;
 
@@ -163,7 +163,7 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 		log_likelihood_prev=log_likelihood_cur;
 		log_likelihood_cur=0;
 
-		for (int32_t i=0; i<num_vectors; i++)
+		for (index_t i=0; i<num_vectors; i++)
 		{
 			logPx[i]=0;
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
@@ -206,7 +206,7 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 		SG_ERROR("Can't run SMEM with less than 3 component mixture model.\n")
 
 	CDotFeatures* dotdata=(CDotFeatures *) features;
-	int32_t num_vectors=dotdata->get_num_vectors();
+	index_t num_vectors=dotdata->get_num_vectors();
 
 	float64_t cur_likelihood=train_em(min_cov, max_em_iter, min_change);
 
@@ -219,15 +219,15 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 	float64_t* logPostSumSum=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
 	float64_t* split_crit=SG_MALLOC(float64_t, m_components.size());
 	float64_t* merge_crit=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
-	int32_t* split_ind=SG_MALLOC(int32_t, m_components.size());
-	int32_t* merge_ind=SG_MALLOC(int32_t, m_components.size()*(m_components.size()-1)/2);
+	index_t* split_ind=SG_MALLOC(index_t, m_components.size());
+	index_t* merge_ind=SG_MALLOC(index_t, m_components.size()*(m_components.size()-1)/2);
 
 	while (iter<max_iter)
 	{
 		memset(logPostSum, 0, m_components.size()*sizeof(float64_t));
 		memset(logPostSum2, 0, m_components.size()*sizeof(float64_t));
 		memset(logPostSumSum, 0, (m_components.size()*(m_components.size()-1)/2)*sizeof(float64_t));
-		for (int32_t i=0; i<num_vectors; i++)
+		for (index_t i=0; i<num_vectors; i++)
 		{
 			logPx[i]=0;
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
@@ -263,7 +263,7 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 			logPostSum[i]=CMath::log(logPostSum[i]);
 			split_crit[i]=0;
 			split_ind[i]=i;
-			for (int32_t j=0; j<num_vectors; j++)
+			for (index_t j=0; j<num_vectors; j++)
 			{
 				split_crit[i]+=(logPost[j*m_components.size()+i]-logPostSum[i]-logPxy[j*m_components.size()+i]+CMath::log(m_coefficients[i]))*
 								(CMath::exp(logPost[j*m_components.size()+i])/CMath::exp(logPostSum[i]));