Refactor all machine (#5104)

* refactor all machines * fix unit test * fix python legacy and jupyter notebook
shogun-toolbox · Dec 8, 2020 · c4676c9 · c4676c9
1 parent b05576e
commit c4676c9
Show file tree

Hide file tree

Showing 71 changed files with 255 additions and 430 deletions.
diff --git a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb
@@ -197,10 +197,10 @@
    "outputs": [],
    "source": [
     "# create ID3ClassifierTree object\n",
-    "id3 = sg.create_machine(\"ID3ClassifierTree\", labels=labels)\n",
+    "id3 = sg.create_machine(\"ID3ClassifierTree\")\n",
     "\n",
     "# learn the tree from training features\n",
-    "is_successful = id3.train(train_feats)"
+    "is_successful = id3.train(train_feats, labels)"
    ]
   },
   {
@@ -412,10 +412,10 @@
     "    train_lab = sg.create_labels(labels)\n",
     "\n",
     "    # create ID3ClassifierTree object\n",
-    "    id3 = sg.create_machine(\"ID3ClassifierTree\", labels=train_lab)\n",
+    "    id3 = sg.create_machine(\"ID3ClassifierTree\")\n",
     "\n",
     "    # learn the tree from training features\n",
-    "    id3.train(train_feats)\n",
+    "    id3.train(train_feats, train_lab)\n",
     "\n",
     "    # apply to test dataset\n",
     "    output = id3.apply(test_feats)\n",
@@ -610,9 +610,9 @@
     "# steps in C4.5 Tree training bundled together in a python method\n",
     "def train_tree(feats,types,labels):\n",
     "    # C4.5 Tree object\n",
-    "    tree = sg.create_machine(\"C45ClassifierTree\", labels=labels, m_nominal=types)\n",
+    "    tree = sg.create_machine(\"C45ClassifierTree\", m_nominal=types)\n",
     "    # supply training matrix and train\n",
-    "    tree.train(feats)\n",
+    "    tree.train(feats, labels)\n",
     "    \n",
     "    return tree\n",
     "\n",
@@ -1406,10 +1406,9 @@
     "    # create CHAID tree object\n",
     "    c = sg.create_machine(\"CHAIDTree\", dependent_vartype=dependent_var_type,\n",
     "                   feature_types=feature_types,\n",
-    "                   num_breakpoints=num_bins,\n",
-    "                   labels = labels)\n",
+    "                   num_breakpoints=num_bins)\n",
     "    # train using training features\n",
-    "    c.train(feats)\n",
+    "    c.train(feats, labels)\n",
     "    \n",
     "    return c\n",
     "\n",

diff --git a/doc/ipython-notebooks/neuralnets/autoencoders.ipynb b/doc/ipython-notebooks/neuralnets/autoencoders.ipynb
@@ -276,8 +276,7 @@
     "\n",
     "nn.put('max_num_epochs', 50)\n",
     "\n",
-    "nn.put('labels', Ytrain)\n",
-    "_ = nn.train(Xtrain)"
+    "_ = nn.train(Xtrain, Ytrain)"
    ]
   },
   {
@@ -404,10 +403,9 @@
     "# train the network\n",
     "conv_nn.put('epsilon', 0.0)\n",
     "conv_nn.put('max_num_epochs', 50)\n",
-    "conv_nn.put('labels', Ytrain)\n",
     "\n",
     "# start training. this might take some time\n",
-    "_ = conv_nn.train(Xtrain)"
+    "_ = conv_nn.train(Xtrain, Ytrain)"
    ]
   },
   {
@@ -462,7 +460,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

diff --git a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
@@ -236,8 +236,7 @@
     "# uncomment this line to allow the training progress to be printed on the console\n",
     "#from shogun import MSG_INFO; net_no_reg.io.put('loglevel', MSG_INFO)\n",
     "\n",
-    "net_no_reg.put('labels', Ytrain)\n",
-    "net_no_reg.train(Xtrain) # this might take a while, depending on your machine\n",
+    "net_no_reg.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n",
     "\n",
     "# compute accuracy on the validation set\n",
     "print(\"Without regularization, accuracy on the validation set =\", compute_accuracy(net_no_reg, Xval, Yval), \"%\")"
@@ -265,8 +264,7 @@
     "net_l2.put('max_num_epochs', 600)\n",
     "net_l2.put('seed', 10)\n",
     "\n",
-    "net_l2.put('labels', Ytrain)\n",
-    "net_l2.train(Xtrain) # this might take a while, depending on your machine\n",
+    "net_l2.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n",
     "\n",
     "# compute accuracy on the validation set\n",
     "print(\"With L2 regularization, accuracy on the validation set =\", compute_accuracy(net_l2, Xval, Yval), \"%\")"
@@ -294,8 +292,7 @@
     "net_l1.put('max_num_epochs', 600)\n",
     "net_l1.put('seed', 10)\n",
     "\n",
-    "net_l1.put('labels', Ytrain)\n",
-    "net_l1.train(Xtrain) # this might take a while, depending on your machine\n",
+    "net_l1.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n",
     "\n",
     "# compute accuracy on the validation set\n",
     "print(\"With L1 regularization, accuracy on the validation set =\", compute_accuracy(net_l1, Xval, Yval), \"%\")"
@@ -336,8 +333,7 @@
     "net_dropout.put('gd_learning_rate', 0.5)\n",
     "net_dropout.put('gd_mini_batch_size', 100)\n",
     "\n",
-    "net_dropout.put('labels', Ytrain)\n",
-    "net_dropout.train(Xtrain) # this might take a while, depending on your machine\n",
+    "net_dropout.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n",
     "\n",
     "# compute accuracy on the validation set\n",
     "print(\"With dropout, accuracy on the validation set =\", compute_accuracy(net_dropout, Xval, Yval), \"%\")"
@@ -431,8 +427,7 @@
     "net_conv.put(\"seed\", 10)\n",
     "\n",
     "# start training\n",
-    "net_conv.put('labels', Ytrain)\n",
-    "net_conv.train(Xtrain)\n",
+    "net_conv.train(Xtrain, Ytrain)\n",
     "\n",
     "# compute accuracy on the validation set\n",
     "print(\"With a convolutional network, accuracy on the validation set =\", compute_accuracy(net_conv, Xval, Yval), \"%\")"
@@ -511,7 +506,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

diff --git a/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb b/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb
@@ -370,8 +370,7 @@
     "nn.put(\"l2_coefficient\", 0.0001)\n",
     "\n",
     "# start training\n",
-    "nn.put('labels', sg.create_labels(Ytrain))\n",
-    "nn.train(sg.create_features(Xtrain))"
+    "nn.train(sg.create_features(Xtrain), sg.create_labels(Ytrain))"
    ]
   },
   {
@@ -426,7 +425,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

diff --git a/examples/meta/src/binary/domainadaptationsvm.sg.in b/examples/meta/src/binary/domainadaptationsvm.sg.in
@@ -14,8 +14,8 @@ svm_kernel.init(feats_train, feats_train)
 #![create_kernel]
 
 #![create_svm_and_train]
-Machine svm = create_machine("SVMLight", kernel=svm_kernel, labels=labels_train, C1=1.0, C2=1.0)
-svm.train()
+Machine svm = create_machine("SVMLight", kernel=svm_kernel, C1=1.0, C2=1.0)
+svm.train(feats_train, labels_train)
 #![create_svm_and_train]
 
 #![create_kernel]
@@ -24,11 +24,11 @@ svm_kernel2.init(feats_train, feats_train)
 #![create_kernel]
 
 #![obtain_dasvm_from_the_previous_svm]
-Machine dasvm = create_machine("DomainAdaptationSVM", C1=1.0, C2=1.0, kernel=svm_kernel2, labels=labels_train, presvm=as_svm(svm), B=1.0)
+Machine dasvm = create_machine("DomainAdaptationSVM", C1=1.0, C2=1.0, kernel=svm_kernel2, presvm=as_svm(svm), B=1.0)
 #![obtain_dasvm_from_the_previous_svm]
 
 #![train_and_apply]
-dasvm.train()
+dasvm.train(feats_train, labels_train)
 Labels labels_predict = dasvm.apply(feats_test)
 RealVector labels_vector = labels_predict.get_real_vector("labels")
 RealVector weights = svm.get_real_vector("m_alpha")

diff --git a/examples/meta/src/evaluation/cross_validation.sg.in b/examples/meta/src/evaluation/cross_validation.sg.in
@@ -51,7 +51,7 @@ Labels reg_labels_test = create_labels(reg_lab_test)
 
 #![create_machine_REGRESSION]
 real tau = 0.001
-Machine lrr = create_machine("LinearRidgeRegression", tau=tau, labels=reg_labels_train)
+Machine lrr = create_machine("LinearRidgeRegression", tau=tau)
 #![create_instance_REGRESSION]
 
 #![create_cross_validation_REGRESSION]

diff --git a/examples/meta/src/multiclass/chaid_tree.sg.in b/examples/meta/src/multiclass/chaid_tree.sg.in
@@ -18,11 +18,10 @@ ft[1] = 2
 
 #![create_instance]
 CHAIDTree classifier(0, ft, 10)
-classifier.set_labels(labels_train)
 #![create_instance]
 
 #![train_and_apply]
-classifier.train(features_train)
+classifier.train(features_train, labels_train)
 MulticlassLabels labels_predict = classifier.apply_multiclass(features_test)
 #![train_and_apply]
 

diff --git a/examples/meta/src/multiclass/relaxed_tree.sg.in b/examples/meta/src/multiclass/relaxed_tree.sg.in
@@ -17,13 +17,12 @@ Kernel k = create_kernel("GaussianKernel")
 
 #![create_instance]
 RelaxedTree machine()
-machine.set_labels(labels_train)
 machine.set_machine_for_confusion_matrix(mll)
 machine.set_kernel(k)
 #![create_instance]
 
 #![train_and_apply]
-machine.train(features_train)
+machine.train(features_train, labels_train)
 MulticlassLabels labels_predict = machine.apply_multiclass(features_test)
 #![train_and_apply]
 

diff --git a/examples/meta/src/neural_nets/convolutional_net_classification.sg.in b/examples/meta/src/neural_nets/convolutional_net_classification.sg.in
@@ -11,7 +11,7 @@ Labels labels_test = create_labels(f_labels_test)
 #![create_features]
 
 #![create_instance]
-Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, max_num_epochs=4, epsilon=0.0, optimization_method="NNOM_GRADIENT_DESCENT", gd_learning_rate=0.01, gd_mini_batch_size=3, max_norm=1.0, dropout_input=0.5)
+Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, max_num_epochs=4, epsilon=0.0, optimization_method="NNOM_GRADIENT_DESCENT", gd_learning_rate=0.01, gd_mini_batch_size=3, max_norm=1.0, dropout_input=0.5)
 #![create_instance]
 
 #![add_layers]
@@ -27,7 +27,7 @@ network.put("seed", 10)
 #![add_layers]
 
 #![train_and_apply]
-network.train(features_train)
+network.train(features_train, labels_train)
 Labels labels_predict = network.apply(features_test)
 #![train_and_apply]
 

diff --git a/examples/meta/src/neural_nets/feedforward_net_classification.sg.in b/examples/meta/src/neural_nets/feedforward_net_classification.sg.in
@@ -12,7 +12,7 @@ Labels labels_test = create_labels(f_labels_test)
 
 #![create_instance]
 int num_feats = features_train.get_int("num_features")
-Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.01, dropout_hidden=0.5, max_num_epochs=50, gd_mini_batch_size=num_feats, gd_learning_rate=0.1, gd_momentum=0.9)
+Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, l2_coefficient=0.01, dropout_hidden=0.5, max_num_epochs=50, gd_mini_batch_size=num_feats, gd_learning_rate=0.1, gd_momentum=0.9)
 #![create_instance]
 
 #![add_layers]
@@ -26,7 +26,7 @@ network.put("seed", 1)
 #![add_layers]
 
 #![train_and_apply]
-network.train(features_train)
+network.train(features_train, labels_train)
 Labels labels_predict = network.apply(features_test)
 #![train_and_apply]
 

diff --git a/examples/meta/src/neural_nets/feedforward_net_regression.sg.in b/examples/meta/src/neural_nets/feedforward_net_regression.sg.in
@@ -13,7 +13,7 @@ Labels labels_test = create_labels(f_labels_test)
 
 #![create_instance]
 int num_feats = features_train.get_int("num_features")
-Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.1, epsilon=0.0, max_num_epochs=40, gd_learning_rate=0.1, gd_momentum=0.9)
+Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, l2_coefficient=0.1, epsilon=0.0, max_num_epochs=40, gd_learning_rate=0.1, gd_momentum=0.9)
 #![create_instance]
 
 #![add_layers]
@@ -27,7 +27,7 @@ network.put("seed", 1)
 #![add_layers]
 
 #![train_and_apply]
-network.train(features_train)
+network.train(features_train, labels_train)
 Labels labels_predict = network.apply(features_test)
 #![train_and_apply]
 

diff --git a/examples/meta/src/regression/chaid_tree.sg.in b/examples/meta/src/regression/chaid_tree.sg.in
@@ -14,11 +14,11 @@ ft[0] = 2
 #![set_feature_types]
 
 #![create_machine]
-Machine chaidtree = create_machine("CHAIDTree", labels=labels_train, dependent_vartype=2, feature_types=ft, num_breakpoints=50)
+Machine chaidtree = create_machine("CHAIDTree", dependent_vartype=2, feature_types=ft, num_breakpoints=50)
 #![create_machine]
 
 #![train_and_apply]
-chaidtree.train(feats_train)
+chaidtree.train(feats_train, labels_train)
 Labels labels_predict = chaidtree.apply(feats_test)
 #![train_and_apply]
 

diff --git a/examples/undocumented/python/kernel_histogram_word_string.py b/examples/undocumented/python/kernel_histogram_word_string.py
@@ -17,8 +17,8 @@ def kernel_histogram_word_string (fm_train_dna=traindat,fm_test_dna=testdat,labe
 	feats_test=sg.create_string_features(charfeat, order-1, order, 0, False)
 
 	labels=sg.create_labels(label_train_dna)
-	pie=sg.create_machine("PluginEstimate", pos_pseudo=ppseudo_count, neg_pseudo=npseudo_count, labels=labels)
-	pie.train(feats_train)
+	pie=sg.create_machine("PluginEstimate", pos_pseudo=ppseudo_count, neg_pseudo=npseudo_count)
+	pie.train(feats_train, labels)
 
 	kernel=sg.create_kernel("HistogramWordStringKernel", estimate=pie)
 	kernel.init(feats_train, feats_train)

diff --git a/examples/undocumented/python/kernel_salzberg_word_string.py b/examples/undocumented/python/kernel_salzberg_word_string.py
@@ -17,8 +17,8 @@ def kernel_salzberg_word_string (fm_train_dna=traindat,fm_test_dna=testdat,label
 	feats_test=sg.create_string_features(charfeat, order-1, order, gap, reverse)
 
 	labels=sg.create_labels(label_train_dna)
-	pie=sg.create_machine("PluginEstimate", labels=labels)
-	pie.train(feats_train)
+	pie=sg.create_machine("PluginEstimate")
+	pie.train(feats_train, labels)
 
 	kernel=sg.create_kernel("SalzbergWordStringKernel", plugin_estimate=pie, labels=labels)
 	kernel.init(feats_train, feats_train)

diff --git a/examples/undocumented/python/multiclass_c45classifiertree.py b/examples/undocumented/python/multiclass_c45classifiertree.py
@@ -34,9 +34,8 @@ def multiclass_c45classifiertree(train=traindat,test=testdat,labels=label_traind
 	feats_train.add_subset(trsubset)
 
 	c=C45ClassifierTree()
-	c.set_labels(train_labels)
 	c.set_feature_types(ft)
-	c.train(feats_train)
+	c.train(feats_train, train_labels)
 
 	train_labels.remove_subset()
 	feats_train.remove_subset()

diff --git a/examples/undocumented/python/multiclass_id3classifiertree.py b/examples/undocumented/python/multiclass_id3classifiertree.py
@@ -30,8 +30,7 @@ def multiclass_id3classifiertree(train=train_data,labels=train_labels,test=test_
 
 	# ID3 Tree formation
 	id3=ID3ClassifierTree()
-	id3.set_labels(feats_labels)
-	id3.train(feats_train)
+	id3.train(feats_train, feats_labels)
 
 	# Classify test data
 	output=id3.apply_multiclass(feats_test).get_labels()

diff --git a/examples/undocumented/python/stochasticgbmachine.py b/examples/undocumented/python/stochasticgbmachine.py
@@ -28,8 +28,7 @@ def stochasticgbmachine(train=traindat,train_labels=label_traindat,ft=feat_types
 	# train
 	feats.add_subset(np.int32(p[0:int(num)]))
 	labels.add_subset(np.int32(p[0:int(num)]))
-	s.set_labels(labels)
-	s.train(feats)
+	s.train(feats, labels)
 	feats.remove_subset()
 	labels.remove_subset()
 

diff --git a/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py b/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py
@@ -29,8 +29,8 @@ def structure_discrete_hmsvm_bmrm (m_data_dict=data_dict):
 	model = sg.create_structured_model("HMSVMModel", features=features, labels=labels, 
 								state_model_type="SMT_TWO_STATE", num_obs=num_obs)
 
-	sosvm = sg.create_machine("DualLibQPBMSOSVM", model=model, labels=labels, m_lambda=5000.0)
-	sosvm.train()
+	sosvm = sg.create_machine("DualLibQPBMSOSVM", model=model, m_lambda=5000.0)
+	sosvm.train(features, labels)
 	#print sosvm.get_w()
 
 	predicted = sosvm.apply(features)

diff --git a/examples/undocumented/python/structure_factor_graph_model.py b/examples/undocumented/python/structure_factor_graph_model.py
@@ -112,9 +112,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w
 	model.add("factor_types", ftype[2])
 
 	# --- training with BMRM ---
-	bmrm = sg.create_machine("DualLibQPBMSOSVM", model=model, labels=tr_labels, m_lambda=0.01)
+	bmrm = sg.create_machine("DualLibQPBMSOSVM", model=model, m_lambda=0.01)
 	#bmrm.set_verbose(True)
-	bmrm.train()
+	bmrm.train(tr_samples, tr_labels)
 	#print 'learned weights:'
 	#print bmrm.get_w()
 	#print 'ground truth weights:'
@@ -142,9 +142,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w
 	#print hbm.get_train_errors()
 
 	# --- training with SGD ---
-	sgd = sg.create_machine("StochasticSOSVM", model=model, labels=tr_labels, m_lambda=0.01)
+	sgd = sg.create_machine("StochasticSOSVM", model=model, m_lambda=0.01)
 	#sgd.set_verbose(True)
-	sgd.train()
+	sgd.train(tr_samples, tr_labels)
 
 	# evaluation
 	#print('SGD: Average training error is %.4f' % SOSVMHelper.average_loss(sgd.get_w(), model))
@@ -154,9 +154,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w
 	#print hp.get_train_errors()
 
 	# --- training with FW ---
-	fw = sg.create_machine("FWSOSVM", model=model, labels=tr_labels, m_lambda=0.01, 
+	fw = sg.create_machine("FWSOSVM", model=model, m_lambda=0.01, 
 					gap_threshold=0.01)
-	fw.train()
+	fw.train(tr_samples, tr_labels)
 
 	# evaluation
 	#print('FW: Average training error is %.4f' % SOSVMHelper.average_loss(fw.get_w(), model))