Skip to content

Commit

Permalink
Merge pull request #1954 from PirosB3/feature/1868
Browse files Browse the repository at this point in the history
Looks good.
  • Loading branch information
tklein23 committed Mar 15, 2014
2 parents 9b6dcd2 + 4a6bec1 commit 55912da
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 0 deletions.
72 changes: 72 additions & 0 deletions applications/classification/evaluate_multiclass_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python

# Copyright (c) The Shogun Machine Learning Toolbox
# Written (w) 2014 Daniel Pyrathon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the Shogun Development Team.


import argparse
import logging
import numpy as np
from modshogun import (LibSVMFile, MulticlassLabels, MulticlassAccuracy)
from utils import get_features_and_labels

LOGGER = logging.getLogger(__file__)

def parse_arguments():
parser = argparse.ArgumentParser(description="Evaluate predicted \
labels againsy bare truth")
parser.add_argument('--actual', required=True, type=str,
help='Path to LibSVM dataset.')
parser.add_argument('--predicted', required=True, type=str,
help='Path to serialized predicted labels')
return parser.parse_args()


def main(actual, predicted):
LOGGER.info("SVM Multiclass evaluator")

# Load SVMLight dataset
feats, labels = get_features_and_labels(LibSVMFile(actual))

# Load predicted labels
with open(predicted, 'r') as f:
predicted_labels_arr = np.array([float(l) for l in f])
predicted_labels = MulticlassLabels(predicted_labels_arr)

# Evaluate accuracy
multiclass_measures = MulticlassAccuracy()
LOGGER.info("Accuracy = %s" % multiclass_measures.evaluate(
labels, predicted_labels))
LOGGER.info("Confusion matrix:")
res = multiclass_measures.get_confusion_matrix(labels, predicted_labels)
print res


if __name__ == '__main__':
args = parse_arguments()
main(args.actual, args.predicted)
75 changes: 75 additions & 0 deletions applications/classification/predict_multiclass_svm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python

# Copyright (c) The Shogun Machine Learning Toolbox
# Written (w) 2014 Daniel Pyrathon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the Shogun Development Team.


import argparse
import logging
from contextlib import closing
from modshogun import (LibSVMFile, SparseRealFeatures, MulticlassLabels,
MulticlassLibSVM, SerializableHdf5File,
MulticlassAccuracy)
from utils import get_features_and_labels

LOGGER = logging.getLogger(__file__)

def parse_arguments():
parser = argparse.ArgumentParser(description="Test a serialized SVM \
classifier agains a SVMLight test file")
parser.add_argument('--classifier', required=True, type=str,
help='Path to training dataset in LibSVM format.')
parser.add_argument('--testset', required=True, type=str,
help='Path to the SVMLight test file')
parser.add_argument('--output', required=True, type=str,
help='File path to write predicted labels')
return parser.parse_args()


def main(classifier, testset, output):
LOGGER.info("SVM Multiclass evaluation")

svm = MulticlassLibSVM()
serialized_classifier = SerializableHdf5File(classifier, 'r')
with closing(serialized_classifier):
svm.load_serializable(serialized_classifier)

test_feats, test_labels = get_features_and_labels(LibSVMFile(testset))
predicted_labels = svm.apply(test_feats)

with open(output, 'w') as f:
for cls in predicted_labels.get_labels():
f.write("%s\n" % int(cls))

LOGGER.info("Predicted labels saved in: '%s'" % output)


if __name__ == '__main__':
args = parse_arguments()
main(args.classifier, args.testset, args.output)

98 changes: 98 additions & 0 deletions applications/classification/train_multiclass_svm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python

# Copyright (c) The Shogun Machine Learning Toolbox
# Written (w) 2014 Daniel Pyrathon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the Shogun Development Team.


import argparse
import logging
from contextlib import contextmanager, closing
from modshogun import (LibSVMFile, GaussianKernel, MulticlassLibSVM,
SerializableHdf5File, LinearKernel)
from utils import get_features_and_labels, track_execution

LOGGER = logging.getLogger(__file__)

KERNELS = {
'linear': lambda feats, width: LinearKernel(feats, feats),
'gaussian': lambda feats, width: GaussianKernel(feats, feats, width),
}

def parse_arguments():
parser = argparse.ArgumentParser(description="Train a multiclass SVM \
stored in libsvm format")
parser.add_argument('--dataset', required=True, type=str,
help='Path to training dataset in LibSVM format.')
parser.add_argument('--capacity', default=1.0, type=float,
help='SVM capacity parameter')
parser.add_argument('--width', default=2.1, type=float,
help='Width of the Gaussian Kernel to approximate')
parser.add_argument('--epsilon', default=0.01, type=float,
help='SVMOcas epsilon parameter')
parser.add_argument('--kernel', type=str, default='linear',
choices=['linear', 'gaussian'],
help='Optionally specify a kernel type. \
Only Linear or Gaussian')
parser.add_argument('--output', required=True, type=str,
help='Destination path for the output serialized \
classifier')
return parser.parse_args()


def main(dataset, output, epsilon, capacity, width, kernel_type):

LOGGER.info("SVM Multiclass classifier")
LOGGER.info("Epsilon: %s" % epsilon)
LOGGER.info("Capacity: %s" % capacity)
LOGGER.info("Gaussian width: %s" % width)

# Get features
feats, labels = get_features_and_labels(LibSVMFile(dataset))

# Create kernel
try:
kernel = KERNELS[kernel_type](feats, width)
except KeyError:
LOGGER.error("Kernel %s not available. try Gaussian or Linear" % kernel_type)

# Initialize and train Multiclass SVM
svm = MulticlassLibSVM(capacity, kernel, labels)
svm.set_epsilon(epsilon)
with track_execution():
svm.train()

# Serialize to file
writable_file = SerializableHdf5File(output, 'w')
with closing(writable_file):
svm.save_serializable(writable_file)
LOGGER.info("Serialized classifier saved in: '%s'" % output)


if __name__ == '__main__':
args = parse_arguments()
main(args.dataset, args.output, args.epsilon, args.capacity, args.width, args.kernel)
51 changes: 51 additions & 0 deletions applications/classification/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python

# Copyright (c) The Shogun Machine Learning Toolbox
# Written (w) 2014 Daniel Pyrathon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the Shogun Development Team.

import logging
from contextlib import contextmanager
from modshogun import MulticlassLabels, SparseRealFeatures, Time


logging.basicConfig(level=logging.INFO, format='[%(asctime)-15s %(module)s] %(message)s')
LOGGER = logging.getLogger(__file__)

def get_features_and_labels(input_file):
feats = SparseRealFeatures()
label_array = feats.load_with_labels(input_file)
labels = MulticlassLabels(label_array)
return feats, labels

@contextmanager
def track_execution():
LOGGER.info('Starting training.')
timer = Time()
yield
timer.stop()
LOGGER.info('Training completed, took {0:.2f}s.'.format(timer.time_diff_sec()))

0 comments on commit 55912da

Please sign in to comment.