- changing asserts to raise error, to better integrate with pytest

- many more tests and much sharper tests
raamana · Mar 17, 2017 · 65dbdf9 · 65dbdf9
1 parent 1c452ab
commit 65dbdf9
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 92 deletions.
diff --git a/.coverage b/.coverage
@@ -1 +1 @@
-!coverage.py: This is a private format, don't read it directly!{"lines":{"/home/praamana/pyradigm/setup.py":[],"/home/praamana/pyradigm/pyradigm/pyradigm.py":[512,1,2,3,4,5,6,7,8,172,599,12,13,15,34,529,18,531,533,534,535,24,25,26,27,29,542,31,32,33,546,91,548,549,551,552,41,554,555,557,46,559,520,530,51,513,566,567,56,569,583,585,74,587,588,592,514,596,597,87,602,527,604,93,606,608,612,104,618,107,109,624,616,626,19,629,631,122,21,125,127,45,132,134,135,137,140,141,142,621,144,536,147,150,151,152,153,154,155,156,158,161,164,165,166,625,170,517,175,176,177,178,180,30,193,603,195,196,197,201,202,203,204,205,209,547,213,216,36,219,220,221,222,227,229,232,234,39,237,238,240,40,243,250,252,42,254,257,258,43,262,263,265,266,130,271,274,275,277,280,287,289,291,293,295,561,296,298,300,301,303,304,563,309,312,564,314,319,322,323,325,327,328,333,334,335,336,339,341,342,313,347,354,357,359,362,364,368,371,373,376,377,379,382,384,389,392,394,399,402,403,407,412,416,418,421,422,423,424,426,428,435,438,439,440,444,54,459,466,469,594,499,501,502,503,504,505,506,508,510],"/home/praamana/pyradigm/pyradigm/__init__.py":[2,4],"/home/praamana/pyradigm/pyradigm/test_pyradigm.py":[1,2,3,5,7,8,9,10,11,12,14,15,16,17,18,19,21,22,24,26,28,30,31,32,34,36,37,39,40,43,44,47,48,51,52,55,56,59,60,63,64,67,68,69,70,71,72,75,76,79,80,83,84,85,86,89,90,91,93,94,96,98,99,100,101,103,104,105,106,107,108,109,111,113,114,115,116,117,118]}}
+!coverage.py: This is a private format, don't read it directly!{"lines": {"/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/test_pyradigm.py": [1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 24, 26, 28, 30, 31, 32, 34, 36, 37, 39, 40, 43, 44, 47, 48, 51, 52, 55, 56, 59, 60, 63, 64, 67, 68, 69, 70, 71, 72, 75, 76, 79, 80, 83, 84, 85, 86, 89, 90, 91, 93, 94, 96, 98, 99, 100, 101, 103, 104, 105, 106, 107, 108, 109, 111, 113, 114, 115, 116, 117, 118], "/Volumes/data/doc/opensource/pyradigm/setup.py": [], "/Volumes/data/doc/opensource/pyradigm/pyradigm/__init__.py": [], "/Volumes/data/doc/opensource/pyradigm/pyradigm/test_pyradigm.py": [], "/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/pyradigm.py": [512, 1, 2, 3, 4, 5, 6, 7, 8, 172, 599, 12, 13, 15, 34, 529, 18, 531, 533, 534, 535, 24, 25, 26, 27, 29, 542, 31, 32, 33, 546, 91, 548, 549, 551, 552, 41, 554, 555, 557, 46, 559, 520, 530, 51, 513, 566, 567, 56, 569, 583, 585, 74, 587, 588, 592, 514, 596, 597, 87, 602, 527, 604, 93, 606, 608, 612, 104, 618, 107, 109, 624, 616, 626, 19, 629, 631, 122, 21, 125, 127, 45, 132, 134, 135, 137, 140, 141, 142, 621, 144, 536, 147, 150, 151, 152, 153, 154, 155, 156, 158, 161, 164, 165, 166, 625, 170, 517, 175, 176, 177, 178, 180, 30, 193, 603, 195, 196, 197, 201, 202, 203, 204, 205, 209, 547, 213, 216, 36, 219, 220, 221, 222, 227, 229, 232, 234, 39, 237, 238, 240, 40, 243, 250, 252, 42, 254, 257, 258, 43, 262, 263, 265, 266, 130, 271, 274, 275, 277, 280, 287, 289, 291, 293, 295, 561, 296, 298, 300, 301, 303, 304, 563, 309, 312, 564, 314, 319, 322, 323, 325, 327, 328, 333, 334, 335, 336, 339, 341, 342, 313, 347, 354, 357, 359, 362, 364, 368, 371, 373, 376, 377, 379, 382, 384, 389, 392, 394, 399, 402, 403, 407, 412, 416, 418, 421, 422, 423, 424, 426, 428, 435, 438, 439, 440, 444, 54, 459, 466, 469, 594, 499, 501, 502, 503, 504, 505, 506, 508, 510], "/Volumes/data/doc/opensource/pyradigm/pyradigm/pyradigm.py": [], "/Volumes/data/doc/opensource/pyradigm/examples_dontAddToRepo.py": [], "/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/__init__.py": [2, 4], "/Volumes/data/doc/opensource/pyradigm/test_init.py": [3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21], "/Volumes/data/doc/opensource/pyradigm/nidata_trials.py": []}}
diff --git a/.gitignore b/.gitignore
@@ -36,15 +36,6 @@ pip-log.txt
 pip-delete-this-directory.txt
 
 # Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*,cover
-.hypothesis/
 
 # Translations
 *.mo

diff --git a/pyradigm/pyradigm.py b/pyradigm/pyradigm.py
@@ -7,7 +7,6 @@
 import cPickle as pickle
 import copy
 
-
 # TODO profile the class for different scales of samples and features
 class MLDataset(object):
     """Class defining a ML dataset that helps maintain integrity and ease of access."""
@@ -23,8 +22,11 @@ def __init__(self, filepath=None, in_dataset=None,
                 self.__load(filepath)
             else:
                 raise IOError('Specified file could not be read.')
-        elif in_dataset is not None and isinstance(in_dataset, MLDataset):
-            assert in_dataset.num_samples > 0, ValueError('Dataset to copy is empty.')
+        elif in_dataset is not None:
+            if not isinstance(in_dataset, MLDataset):
+                raise ValueError('Invalid class input: MLDataset expected!')
+            if in_dataset.num_samples <= 0:
+                raise ValueError('Dataset to copy is empty.')
             self.__copy(in_dataset)
         elif data is None and labels is None and classes is None:
             # TODO refactor the code to use only basic dict, as it allows for better equality comparisons
@@ -187,11 +189,11 @@ def add_sample(self, sample_id, features, label, class_id=None, feature_names=No
                 if feature_names is None:
                     self.__feature_names = self.__str_names(self.num_features)
             else:
-                assert self.__num_features == len(features), \
-                    ValueError('dimensionality of this sample ({}) does not match existing samples ({})'.format(
+                if self.__num_features != len(features):
+                    raise ValueError('dimensionality of this sample ({}) does not match existing samples ({})'.format(
                         len(features), self.__num_features))
-                assert isinstance(features, self.__dtype), TypeError(
-                    "Mismatched dtype. Provide {}".format(self.__dtype))
+                if not isinstance(features, self.__dtype):
+                    raise TypeError("Mismatched dtype. Provide {}".format(self.__dtype))
 
                 self.__data[sample_id] = features
                 self.__labels[sample_id] = label
@@ -339,11 +341,11 @@ def random_subset_ids(self, perc_per_class=0.5):
             random.shuffle(this_class)
             # calculating the requested number of samples
             subset_size_this_class = np.int64(np.floor(class_size * perc_per_class))
-            # clipping the range to [0, n]
-            subset_size_this_class = max(0, min(class_size, subset_size_this_class))
-            if subset_size_this_class < 1 or this_class is None:
+            # clipping the range to [1, n]
+            subset_size_this_class = max(1, min(class_size, subset_size_this_class))
+            if subset_size_this_class < 1 or len(this_class) < 1 or this_class is None:
                 # warning if none were selected
-                warnings.warn('No subjects from class {} were selected.'.format(class_id))
+                raise ValueError('No subjects from class {} were selected.'.format(class_id))
             else:
                 subsets_this_class = this_class[0:subset_size_this_class]
                 subsets.extend(subsets_this_class)
@@ -428,7 +430,7 @@ def description(self, str_val):
     @property
     def num_features(self):
         """number of features in each sample."""
-        return self.__num_features
+        return np.int64(self.__num_features)
 
     @num_features.setter
     def num_features(self, int_val):
@@ -553,6 +555,7 @@ def __copy(self, other):
         self.__labels = copy.deepcopy(other.labels)
         self.__dtype = copy.deepcopy(other.dtype)
         self.__description = copy.deepcopy(other.description)
+        self.__feature_names = copy.deepcopy(other.feature_names)
         self.__num_features = copy.deepcopy(other.num_features)
 
         return self

diff --git a/pyradigm/test_pyradigm.py b/pyradigm/test_pyradigm.py
@@ -1,23 +1,24 @@
 import tempfile
 import os
 import numpy as np
-from pytest import raises
+from pytest import raises, warns, set_trace
 
 from pyradigm import MLDataset
 
 out_dir  = '.'
 for ii in range(1):
-    num_classes = np.random.randint(2, 150, 1)
-    class_set = [ chr(x+65)+str(x) for x in range(num_classes)]
+    num_classes = np.random.randint(2, 150, 1)[0]
+    class_set = [ 'C{}'.format(x) for x in range(num_classes)]
     class_sizes = np.random.randint(5, 200, num_classes)
     num_features = np.random.randint(1, 300, 1).take(0)
+    feat_names = [ str(x) for x in range(num_features) ]
 
     test_dataset = MLDataset()
     for class_index, class_id in enumerate(class_set):
         for sub_ix in xrange(class_sizes[class_index]):
-            subj_id = class_set[class_index] + str(sub_ix)
+            subj_id = '{}_S{}'.format(class_set[class_index],sub_ix)
             feat = np.random.random(num_features)
-            test_dataset.add_sample(subj_id, feat, class_index, class_id)
+            test_dataset.add_sample(subj_id, feat, class_index, class_id, feat_names)
 
     out_file = os.path.join(out_dir,'random_example_dataset{}.pkl'.format(ii))
     test_dataset.save(out_file)
@@ -49,23 +50,44 @@ def test_num_classes():
 def test_num_features():
     assert test_dataset.num_features == num_features
 
+def test_num_samples():
+    assert test_dataset.num_samples == sum(class_sizes)
+
+
+def test_num_features():
+    assert test_dataset.num_features == num_features
+
+
+def test_substract():
+    assert other_classes.num_samples == sum(class_sizes) - class_sizes[rand_index]
+
+def test_add():
+    a = other_classes + random_class
+    n = a.num_samples
+    n1 = other_classes.num_samples
+    n2 = random_class.num_samples
+    assert n1 + n2 == n
+
 def test_cant_read_nonexisting_file():
-    raises(IOError, MLDataset('/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl'))
+    with raises(IOError):
+        a = MLDataset('/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl')
 
 def test_cant_write_to_nonexisting_dir():
-    raises(IOError, test_dataset.save('/nonexistentrandomdir/jdknvoindvi93/arbitrary.noname.pkl'))
+    with raises(IOError):
+        test_dataset.save('/nonexistentrandomdir/jdknvoindvi93/arbitrary.noname.pkl')
 
 def test_invalid_constructor():
-    raises(ValueError, MLDataset(filepath=None,
-                                 in_dataset='/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl'))
-    raises(ValueError, MLDataset(filepath=None,
-                                 in_dataset=None,
-                                 data=list())) # data simply should not be a dict
-    raises(ValueError, MLDataset(filepath=None,
-                                 in_dataset=None,
-                                 data=None,
-                                 labels=None,
-                                 classes='invalid_value'))
+    with raises(ValueError):
+        a = MLDataset(in_dataset='/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl')
+
+    with raises(ValueError):
+        # data simply should not be a dict
+        b = MLDataset(filepath=None, in_dataset=None, data=list())
+
+    with raises(ValueError):
+        c = MLDataset(filepath=None,
+                      in_dataset=None, data=None, labels=None,
+                      classes='invalid_value')
 
 def test_return_data_labels():
     matrix, vec_labels, sub_ids = test_dataset.data_and_labels()
@@ -84,7 +106,7 @@ def test_labels_setter():
         test_dataset.labels = fewer_labels
 
     same_len_diff_key = fewer_labels
-    same_len_diff_key['sldiursvdkvjs'] = 1
+    same_len_diff_key[u'sldiursvdkvjs'] = 1
     with raises(ValueError):
         test_dataset.labels = same_len_diff_key
 
@@ -108,58 +130,40 @@ def test_add_existing_id():
 def test_add_new_id_diff_dim():
     new_id = 'dsfdkfslj38748937439kdshfkjhf38'
     sid = test_dataset.sample_ids[0]
-    data_diff_dim = np.random.rand([test_dataset.num_features+1,1])
+    data_diff_dim = np.random.rand(test_dataset.num_features+1,1)
     with raises(ValueError):
         test_dataset.add_sample(new_id, data_diff_dim, None, None)
 
 def test_del_nonexisting_id():
-    nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
-    with raises(Warning):
+    nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
+    with warns(UserWarning):
         test_dataset.del_sample(nonexisting_id)
 
 def test_get_nonexisting_class():
-    nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
-    with raises(Warning):
+    nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
+    with warns(UserWarning):
         test_dataset.get_class(nonexisting_id)
 
 def test_rand_feat_subset():
-    nf = test_dataset.num_features
+    nf = copy_dataset.num_features
     subset_len = np.random.randint(1, nf, 1).take(0)
-    subset= np.random.random_integers(1, nf, size=[subset_len,1] )
-    subds = test_dataset.get_feature_subset(subset)
+    subset= np.random.random_integers(1, nf, size=subset_len )
+    subds = copy_dataset.get_feature_subset(subset)
     assert subds.num_features == subset_len
 
 def test_eq_self():
     assert test_dataset == test_dataset
 
 
 def test_eq_copy():
-    assert test_dataset == copy_dataset
-
+    new_copy = MLDataset(in_dataset=copy_dataset)
+    assert new_copy == copy_dataset
 
 def test_unpickling():
-    assert test_dataset == reloaded_dataset
-
-
-def test_num_samples():
-    assert test_dataset.num_samples == sum(class_sizes)
-
-
-def test_num_features():
-    assert test_dataset.num_features == num_features
-
-
-def test_substract():
-    assert other_classes.num_samples == sum(class_sizes) - class_sizes[rand_index]
-
-
-def test_add():
-    a = other_classes + random_class
-    n = a.num_samples
-    n1 = other_classes.num_samples
-    n2 = random_class.num_samples
-    assert n1 + n2 == n
-
+    out_file = os.path.join(out_dir, 'random_pickled_dataset.pkl')
+    copy_dataset.save(out_file)
+    reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset')
+    assert copy_dataset == reloaded_dataset
 
 def test_subset_class():
     assert random_class.num_samples == class_sizes[rand_index]
@@ -168,13 +172,13 @@ def test_subset_class():
 def test_get_subset():
     assert random_class == reloaded_dataset.get_class(random_class_name)
 
-    nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
-    with raises(Warning):
+    nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
+    with warns(UserWarning):
         test_dataset.get_subset(nonexisting_id)
 
 def test_membership():
     member = test_dataset.sample_ids[0]
-    not_member = 'sdfdkshfdsk34823058wdkfhd83hifnalwe8fh8t'
+    not_member = u'sdfdkshfdsk34823058wdkfhd83hifnalwe8fh8t'
     assert member in test_dataset
     assert not_member not in test_dataset
 
@@ -186,7 +190,7 @@ def test_glance():
 
 def test_random_subset():
     for perc in np.arange(0.1, 1, 0.1):
-        subset = test_dataset.random_subset(perc_in_class=perc)
+        subset = copy_dataset.random_subset(perc_in_class=perc)
         # separating the calculation by class to mimic the implementation in the class
         expected_size = sum([int(np.floor(n_in_class*perc)) for n_in_class in class_sizes])
         assert subset.num_samples == expected_size
@@ -195,39 +199,39 @@ def test_random_subset_by_count():
 
     smallest_size = min(class_sizes)
     for count in range(1,int(smallest_size)):
-        subset = test_dataset.random_subset_ids_by_count(count_per_class=count)
+        subset = copy_dataset.random_subset_ids_by_count(count_per_class=count)
         assert len(subset) == num_classes*count
 
 def test_train_test_split_ids_count():
     smallest_size = min(class_sizes)
     for count in range(1, int(smallest_size)):
-        subset_train, subset_test = test_dataset.train_test_split_ids(count_per_class=count)
+        subset_train, subset_test = copy_dataset.train_test_split_ids(count_per_class=count)
         assert len(subset_train) == num_classes*count
-        assert len(subset_test) == test_dataset.num_samples-num_classes*count
+        assert len(subset_test ) == copy_dataset.num_samples-num_classes*count
         assert len(set(subset_train).intersection(subset_test))==0
 
-    with raises(Warning):
-        test_dataset.train_test_split_ids(count_per_class=-1)
+    with warns(UserWarning):
+        copy_dataset.train_test_split_ids(count_per_class=-1)
 
-    with raises(Warning):
-        test_dataset.train_test_split_ids(count_per_class=test_dataset.num_samples+1.0)
+    with warns(UserWarning):
+        copy_dataset.train_test_split_ids(count_per_class=copy_dataset.num_samples+1.0)
 
-    with raises(ValueError):
+    with warns(UserWarning):
         # both cant be specified at the same time
-        test_dataset.train_test_split_ids(count_per_class=2, train_perc=0.5)
+        copy_dataset.train_test_split_ids(count_per_class=2, train_perc=0.5)
 
 def test_train_test_split_ids_perc():
 
-    for perc in np.arange(0.1, 0.9, 0.1):
-        subset_train, subset_test = test_dataset.train_test_split_ids(train_perc=perc)
+    for perc in np.arange(0.2, 0.9, 0.1):
+        subset_train, subset_test = copy_dataset.train_test_split_ids(train_perc=perc)
         expected_train_size = sum(np.floor(class_sizes*perc))
         assert len(subset_train) == expected_train_size
-        assert len(subset_test) == test_dataset.num_samples-expected_train_size
+        assert len(subset_test) == copy_dataset.num_samples-expected_train_size
         assert len(set(subset_train).intersection(subset_test))==0
 
-    with raises(Warning):
-        test_dataset.train_test_split_ids(train_perc=1.1)
+    with warns(UserWarning):
+        copy_dataset.train_test_split_ids(train_perc=1.1)
 
-    with raises(Warning):
-        test_dataset.train_test_split_ids(train_perc=-1)
+    with warns(UserWarning):
+        copy_dataset.train_test_split_ids(train_perc=-1)