ipython notebook of factor graph model

shogun-toolbox · Sep 19, 2013 · 9f3af69 · 9f3af69
1 parent dd41296
commit 9f3af69
Show file tree

Hide file tree

Showing 10 changed files with 837 additions and 40 deletions.
diff --git a/doc/ipython-notebooks/structure/FGM.ipynb b/doc/ipython-notebooks/structure/FGM.ipynb
diff --git a/examples/undocumented/python_modular/structure_factor_graph_model.py b/examples/undocumented/python_modular/structure_factor_graph_model.py
@@ -9,6 +9,16 @@
 w_gt = np.array([0.3,0.5,1.0,0.2,0.05,0.6,-0.2,0.75])
 fac_type = TableFactorType(tid, cards, w_gt)
 
+tid_u = 1
+cards_u = np.array([2], np.int32)
+w_gt_u = np.array([0.5,0.8,1.0,-0.3])
+fac_type_u = TableFactorType(tid_u, cards_u, w_gt_u)
+
+tid_b = 2
+cards_b = np.array([2], np.int32)
+w_gt_b = np.array([0.8, -0.8])
+fac_type_b = TableFactorType(tid_b, cards_b, w_gt_b)
+
 def gen_data(ftype, num_samples, show_data = False):
 	from modshogun import Math
 	from modshogun import FactorType, Factor, TableFactorType, FactorGraph
@@ -26,14 +36,39 @@ def gen_data(ftype, num_samples, show_data = False):
 
 		data1 = np.array([2.0*Math.random(0.0,1.0)-1.0 for i in xrange(2)])
 		vind1 = np.array([0,1], np.int32)
-		fac1 = Factor(ftype, vind1, data1)
+		fac1 = Factor(ftype[0], vind1, data1)
 		fg.add_factor(fac1)
 
 		data2 = np.array([2.0*Math.random(0.0,1.0)-1.0 for i in xrange(2)])
 		vind2 = np.array([1,2], np.int32)
-		fac2 = Factor(ftype, vind2, data2)
+		fac2 = Factor(ftype[0], vind2, data2)
 		fg.add_factor(fac2)
 
+		data3 = np.array([2.0*Math.random(0.0,1.0)-1.0 for i in xrange(2)])
+		vind3 = np.array([0], np.int32)
+		fac3 = Factor(ftype[1], vind3, data3)
+		fg.add_factor(fac3)
+
+		data4 = np.array([2.0*Math.random(0.0,1.0)-1.0 for i in xrange(2)])
+		vind4 = np.array([1], np.int32)
+		fac4 = Factor(ftype[1], vind4, data4)
+		fg.add_factor(fac4)
+
+		data5 = np.array([2.0*Math.random(0.0,1.0)-1.0 for i in xrange(2)])
+		vind5 = np.array([2], np.int32)
+		fac5 = Factor(ftype[1], vind5, data5)
+		fg.add_factor(fac5)
+
+		data6 = np.array([1.0])
+		vind6 = np.array([0], np.int32)
+		fac6 = Factor(ftype[2], vind6, data6)
+		fg.add_factor(fac6)
+
+		data7 = np.array([1.0])
+		vind7 = np.array([2], np.int32)
+		fac7 = Factor(ftype[2], vind7, data7)
+		fg.add_factor(fac7)
+
 		samples.add_sample(fg)
 		fg.connect_components()
 		fg.compute_energies()
@@ -50,26 +85,38 @@ def gen_data(ftype, num_samples, show_data = False):
 
 	return samples, labels
 
-num_samples = 100
-samples, labels = gen_data(fac_type, num_samples)
 
-parameter_list = [[samples,labels,w_gt,fac_type]]
+w_all = [w_gt,w_gt_u,w_gt_b]
+ftype_all = [fac_type,fac_type_u,fac_type_b]
+
+num_samples = 10
+samples, labels = gen_data(ftype_all, num_samples)
 
-def structure_factor_graph_model (tr_samples = samples, tr_labels = labels, w = w_gt, ftype = fac_type):
+parameter_list = [[samples,labels,w_all,ftype_all]]
+
+def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w_all, ftype = ftype_all):
 	from modshogun import FactorGraphModel, MAPInference, TREE_MAX_PROD
 	from modshogun import DualLibQPBMSOSVM, LabelsFactory
 
 	# create model
-	model = FactorGraphModel(tr_samples, tr_labels, TREE_MAX_PROD, False)
-	w_truth = w
-	w = np.zeros(8)
-	ftype.set_w(w)
-	model.add_factor_type(ftype)
+	model = FactorGraphModel(tr_samples, tr_labels, TREE_MAX_PROD, True)
+	w_truth = [w[0].copy(), w[1].copy(), w[2].copy()]
+	w[0] = np.zeros(8)
+	w[1] = np.zeros(4)
+	w[2] = np.zeros(2)
+	ftype[0].set_w(w[0])
+	ftype[1].set_w(w[1])
+	ftype[2].set_w(w[2])
+	model.add_factor_type(ftype[0])
+	model.add_factor_type(ftype[1])
+	model.add_factor_type(ftype[2])
 
 	# training
-	bmrm = DualLibQPBMSOSVM(model, tr_labels, 1.0)
+	bmrm = DualLibQPBMSOSVM(model, tr_labels, 0.01)
 	bmrm.train()
+	#print 'learned weights:'
 	#print bmrm.get_w()
+	#print 'ground truth weights:'
 	#print w_truth
 
 	# evaluation
@@ -85,6 +132,15 @@ def structure_factor_graph_model (tr_samples = samples, tr_labels = labels, w =
 
 	#print('Average training error is %.4f' % ave_loss)
 
+	# show primal objs and dual objs
+	from modshogun import BmrmStatistics
+	stats = bmrm.get_result()
+	Fps = stats.get_hist_Fp_vector()
+	Fds = stats.get_hist_Fd_vector()
+	#print Fps
+	#print Fds
+	#print Fps - Fds
+
 if __name__ == '__main__':
 	print("Factor Graph Model")
 	structure_factor_graph_model(*parameter_list[0])
diff --git a/src/shogun/labels/FactorGraphLabels.cpp b/src/shogun/labels/FactorGraphLabels.cpp
@@ -9,13 +9,10 @@ CFactorGraphObservation::CFactorGraphObservation(SGVector<int32_t> observed_stat
 	if (loss_weights.size() == 0)
 	{
 		loss_weights.resize_vector(observed_state.size());
-		SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0);	
+		SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0 / observed_state.size());	
 	}
 
-	REQUIRE(loss_weights.size() == observed_state.size(), "%s::CFactorGraphObservation(): \
-		loss_weights should be the same length as observed_states", get_name());
-
-	m_loss_weights = loss_weights;
+	set_loss_weights(loss_weights);
 }
 
 SGVector<int32_t> CFactorGraphObservation::get_data() const 
@@ -28,6 +25,14 @@ SGVector<float64_t> CFactorGraphObservation::get_loss_weights() const
 	return m_loss_weights; 
 }
 
+void CFactorGraphObservation::set_loss_weights(SGVector<float64_t> loss_weights)
+{
+	REQUIRE(loss_weights.size() == m_observed_state.size(), "%s::set_loss_weights(): \
+		loss_weights should be the same length as observed_states", get_name());
+
+	m_loss_weights = loss_weights;
+}
+
 //-------------------------------------------------------------------
 
 CFactorGraphLabels::CFactorGraphLabels()

diff --git a/src/shogun/labels/FactorGraphLabels.h b/src/shogun/labels/FactorGraphLabels.h
@@ -38,7 +38,7 @@ class CFactorGraphObservation : public CStructuredData
 	 * @param loss_weights weighted loss for each variable
 	 */
 	CFactorGraphObservation(SGVector<int32_t> observed_state, 
-		SGVector<float64_t> loss_weights = SGVector<float64_t>());
+		SGVector<float64_t> loss_weights);
 
 	~CFactorGraphObservation() { }
 
@@ -65,6 +65,12 @@ class CFactorGraphObservation : public CStructuredData
 	/** @return loss weights */
 	SGVector<float64_t> get_loss_weights() const;
 
+	/** set loss weights 
+	 *
+	 * @param loss_weights weights for weighted hamming loss
+	 */
+	void set_loss_weights(SGVector<float64_t> loss_weights);
+
 protected:
 	/** loss weights, usually for weighted hamming loss */
 	SGVector<float64_t> m_loss_weights;

diff --git a/src/shogun/structure/FactorGraph.cpp b/src/shogun/structure/FactorGraph.cpp
@@ -269,7 +269,7 @@ void CFactorGraph::loss_augmentation(SGVector<int32_t> states_gt, SGVector<float
 	if (loss.size() == 0)
 	{
 		loss.resize_vector(states_gt.size());
-		SGVector<float64_t>::fill_vector(loss.vector, loss.vlen, 1.0);	
+		SGVector<float64_t>::fill_vector(loss.vector, loss.vlen, 1.0 / states_gt.size());	
 	}
 
 	int32_t num_vars = states_gt.size();

diff --git a/src/shogun/structure/FactorGraphModel.cpp b/src/shogun/structure/FactorGraphModel.cpp
@@ -245,15 +245,6 @@ SGVector< float64_t > CFactorGraphModel::get_joint_feature_vector(int32_t feat_i
 	SGVector<float64_t> psi(get_dim());
 	psi.zero();
 
-	// counts of different types of factor
-	factor_counts_type fcounts;
-	for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); fi++)
-	{
-		CFactorType* ft = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
-		fcounts[ft->get_type_id()] = 0;
-		SG_UNREF(ft);
-	}
-
 	// construct unnormalized psi
 	CDynamicObjectArray* facs = fg->get_factors();
 	for (int32_t fi = 0; fi < facs->get_num_elements(); ++fi)
@@ -274,16 +265,12 @@ SGVector< float64_t > CFactorGraphModel::get_joint_feature_vector(int32_t feat_i
 		for (int32_t di = 0; di < dat_size; di++)
 			psi[w_map[ei*dat_size + di]] += dat[di];
 
-		++fcounts[id];
-
 		SG_UNREF(ftype);
 		SG_UNREF(fac);
 	}
 
 	// negation (-E(x,y) = <w,phi(x,y)>)
-	// TODO: is normalization necessary? i.e. divided by fcounts
-	for (int32_t di = 0; di < psi.vlen; di++)
-		psi[di] *= -1.0;
+	psi.scale(-1.0);
 
 	SG_UNREF(facs);
 	SG_UNREF(fg);

diff --git a/src/shogun/structure/MAPInference.cpp b/src/shogun/structure/MAPInference.cpp
@@ -10,8 +10,7 @@
 
 #include <shogun/structure/MAPInference.h>
 #include <shogun/structure/BeliefPropagation.h>
-
-#include <string>
+#include <shogun/labels/FactorGraphLabels.h>
 
 using namespace shogun;
 
@@ -95,8 +94,11 @@ void CMAPInference::inference()
 	assignment.zero();
 	m_energy = m_infer_impl->inference(assignment);
 
+	// create structured output, with default normalized hamming loss
 	SG_UNREF(m_outputs);
-	m_outputs = new CFactorGraphObservation(assignment); // already ref() in constructor
+	SGVector<float64_t> loss_weights(m_fg->get_num_vars());
+	SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0 / loss_weights.vlen);
+	m_outputs = new CFactorGraphObservation(assignment, loss_weights); // already ref() in constructor
 	SG_REF(m_outputs);
 }
 

diff --git a/src/shogun/structure/PrimalMosekSOSVM.cpp b/src/shogun/structure/PrimalMosekSOSVM.cpp
@@ -36,11 +36,13 @@ CPrimalMosekSOSVM::CPrimalMosekSOSVM(
 
 void CPrimalMosekSOSVM::init()
 {
-	SG_ADD(&m_slacks, "m_slacks", "Slacks vector", MS_NOT_AVAILABLE);
+	SG_ADD(&m_slacks, "slacks", "Slacks vector", MS_NOT_AVAILABLE);
 	//FIXME model selection available for SO machines
-	SG_ADD(&m_regularization, "m_regularization", "Regularization constant", MS_NOT_AVAILABLE);
+	SG_ADD(&m_regularization, "regularization", "Regularization constant", MS_NOT_AVAILABLE);
+	SG_ADD(&m_epsilon, "epsilon", "Violation tolerance", MS_NOT_AVAILABLE);
 
 	m_regularization = 1.0;
+	m_epsilon = 0.0;
 }
 
 CPrimalMosekSOSVM::~CPrimalMosekSOSVM()
@@ -163,7 +165,7 @@ bool CPrimalMosekSOSVM::train_machine(CFeatures* data)
 					cur_res = (CResultSet*) cur_list->get_next_element();
 				}
 
-				if ( slack > max_slack )
+				if ( slack > max_slack + m_epsilon )
 				{
 					// The current training example is a
 					// violated constraint
@@ -209,6 +211,7 @@ bool CPrimalMosekSOSVM::train_machine(CFeatures* data)
 		}
 
 		SG_DEBUG("QP solved. The primal objective value is %.4f.\n", mosek->get_primal_objective_value());
+
 		++iteration;
 
 	} while ( old_num_con != num_con && ! exception );
@@ -277,4 +280,9 @@ void CPrimalMosekSOSVM::set_regularization(float64_t C)
 	m_regularization = C;
 }
 
+void CPrimalMosekSOSVM::set_epsilon(float64_t epsilon) 
+{ 
+	m_epsilon = epsilon; 
+}
+
 #endif /* USE_MOSEK */
diff --git a/src/shogun/structure/PrimalMosekSOSVM.h b/src/shogun/structure/PrimalMosekSOSVM.h
@@ -72,6 +72,12 @@ class CPrimalMosekSOSVM : public CLinearStructuredOutputMachine
 		 */
 		void set_regularization(float64_t C);
 
+		/** set epsilon 
+		 *
+		 * @param epsilon if slack_i > max_slack_i + epsilon, add to cutting plane set 
+		 */
+		void set_epsilon(float64_t epsilon);
+
 	protected:
 		/** train primal SO-SVM
 		 *
@@ -132,6 +138,9 @@ class CPrimalMosekSOSVM : public CLinearStructuredOutputMachine
 		/** regularization constant */
 		float64_t m_regularization;
 
+		/** epsilon */
+		float64_t m_epsilon;
+
 }; /* class CPrimalMosekSOSVM */
 
 } /* namespace shogun */

diff --git a/tests/unit/structure/BeliefPropagation_unittest.cc b/tests/unit/structure/BeliefPropagation_unittest.cc
@@ -33,7 +33,7 @@ float64_t hamming_loss(SGVector<int32_t> y_truth, SGVector<int32_t> y_pred)
 		if (y_truth[i] != y_pred[i])
 			loss += 1;
 	}
-	return loss;
+	return (loss / y_truth.vlen);
 }
 
 TEST(BeliefPropagation, tree_max_product_string)