Skip to content

Commit

Permalink
- changing asserts to raise error, to better integrate with pytest
Browse files Browse the repository at this point in the history
- many more tests and much sharper tests
  • Loading branch information
raamana committed Mar 17, 2017
1 parent 1c452ab commit 65dbdf9
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 92 deletions.
2 changes: 1 addition & 1 deletion .coverage
@@ -1 +1 @@
!coverage.py: This is a private format, don't read it directly!{"lines":{"/home/praamana/pyradigm/setup.py":[],"/home/praamana/pyradigm/pyradigm/pyradigm.py":[512,1,2,3,4,5,6,7,8,172,599,12,13,15,34,529,18,531,533,534,535,24,25,26,27,29,542,31,32,33,546,91,548,549,551,552,41,554,555,557,46,559,520,530,51,513,566,567,56,569,583,585,74,587,588,592,514,596,597,87,602,527,604,93,606,608,612,104,618,107,109,624,616,626,19,629,631,122,21,125,127,45,132,134,135,137,140,141,142,621,144,536,147,150,151,152,153,154,155,156,158,161,164,165,166,625,170,517,175,176,177,178,180,30,193,603,195,196,197,201,202,203,204,205,209,547,213,216,36,219,220,221,222,227,229,232,234,39,237,238,240,40,243,250,252,42,254,257,258,43,262,263,265,266,130,271,274,275,277,280,287,289,291,293,295,561,296,298,300,301,303,304,563,309,312,564,314,319,322,323,325,327,328,333,334,335,336,339,341,342,313,347,354,357,359,362,364,368,371,373,376,377,379,382,384,389,392,394,399,402,403,407,412,416,418,421,422,423,424,426,428,435,438,439,440,444,54,459,466,469,594,499,501,502,503,504,505,506,508,510],"/home/praamana/pyradigm/pyradigm/__init__.py":[2,4],"/home/praamana/pyradigm/pyradigm/test_pyradigm.py":[1,2,3,5,7,8,9,10,11,12,14,15,16,17,18,19,21,22,24,26,28,30,31,32,34,36,37,39,40,43,44,47,48,51,52,55,56,59,60,63,64,67,68,69,70,71,72,75,76,79,80,83,84,85,86,89,90,91,93,94,96,98,99,100,101,103,104,105,106,107,108,109,111,113,114,115,116,117,118]}}
!coverage.py: This is a private format, don't read it directly!{"lines": {"/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/test_pyradigm.py": [1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 24, 26, 28, 30, 31, 32, 34, 36, 37, 39, 40, 43, 44, 47, 48, 51, 52, 55, 56, 59, 60, 63, 64, 67, 68, 69, 70, 71, 72, 75, 76, 79, 80, 83, 84, 85, 86, 89, 90, 91, 93, 94, 96, 98, 99, 100, 101, 103, 104, 105, 106, 107, 108, 109, 111, 113, 114, 115, 116, 117, 118], "/Volumes/data/doc/opensource/pyradigm/setup.py": [], "/Volumes/data/doc/opensource/pyradigm/pyradigm/__init__.py": [], "/Volumes/data/doc/opensource/pyradigm/pyradigm/test_pyradigm.py": [], "/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/pyradigm.py": [512, 1, 2, 3, 4, 5, 6, 7, 8, 172, 599, 12, 13, 15, 34, 529, 18, 531, 533, 534, 535, 24, 25, 26, 27, 29, 542, 31, 32, 33, 546, 91, 548, 549, 551, 552, 41, 554, 555, 557, 46, 559, 520, 530, 51, 513, 566, 567, 56, 569, 583, 585, 74, 587, 588, 592, 514, 596, 597, 87, 602, 527, 604, 93, 606, 608, 612, 104, 618, 107, 109, 624, 616, 626, 19, 629, 631, 122, 21, 125, 127, 45, 132, 134, 135, 137, 140, 141, 142, 621, 144, 536, 147, 150, 151, 152, 153, 154, 155, 156, 158, 161, 164, 165, 166, 625, 170, 517, 175, 176, 177, 178, 180, 30, 193, 603, 195, 196, 197, 201, 202, 203, 204, 205, 209, 547, 213, 216, 36, 219, 220, 221, 222, 227, 229, 232, 234, 39, 237, 238, 240, 40, 243, 250, 252, 42, 254, 257, 258, 43, 262, 263, 265, 266, 130, 271, 274, 275, 277, 280, 287, 289, 291, 293, 295, 561, 296, 298, 300, 301, 303, 304, 563, 309, 312, 564, 314, 319, 322, 323, 325, 327, 328, 333, 334, 335, 336, 339, 341, 342, 313, 347, 354, 357, 359, 362, 364, 368, 371, 373, 376, 377, 379, 382, 384, 389, 392, 394, 399, 402, 403, 407, 412, 416, 418, 421, 422, 423, 424, 426, 428, 435, 438, 439, 440, 444, 54, 459, 466, 469, 594, 499, 501, 502, 503, 504, 505, 506, 508, 510], "/Volumes/data/doc/opensource/pyradigm/pyradigm/pyradigm.py": [], "/Volumes/data/doc/opensource/pyradigm/examples_dontAddToRepo.py": [], "/Volumes/data/doc/opensource/pyradigm/build/lib/pyradigm/__init__.py": [2, 4], "/Volumes/data/doc/opensource/pyradigm/test_init.py": [3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21], "/Volumes/data/doc/opensource/pyradigm/nidata_trials.py": []}}
9 changes: 0 additions & 9 deletions .gitignore
Expand Up @@ -36,15 +36,6 @@ pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
Expand Down
27 changes: 15 additions & 12 deletions pyradigm/pyradigm.py
Expand Up @@ -7,7 +7,6 @@
import cPickle as pickle
import copy


# TODO profile the class for different scales of samples and features
class MLDataset(object):
"""Class defining a ML dataset that helps maintain integrity and ease of access."""
Expand All @@ -23,8 +22,11 @@ def __init__(self, filepath=None, in_dataset=None,
self.__load(filepath)
else:
raise IOError('Specified file could not be read.')
elif in_dataset is not None and isinstance(in_dataset, MLDataset):
assert in_dataset.num_samples > 0, ValueError('Dataset to copy is empty.')
elif in_dataset is not None:
if not isinstance(in_dataset, MLDataset):
raise ValueError('Invalid class input: MLDataset expected!')
if in_dataset.num_samples <= 0:
raise ValueError('Dataset to copy is empty.')
self.__copy(in_dataset)
elif data is None and labels is None and classes is None:
# TODO refactor the code to use only basic dict, as it allows for better equality comparisons
Expand Down Expand Up @@ -187,11 +189,11 @@ def add_sample(self, sample_id, features, label, class_id=None, feature_names=No
if feature_names is None:
self.__feature_names = self.__str_names(self.num_features)
else:
assert self.__num_features == len(features), \
ValueError('dimensionality of this sample ({}) does not match existing samples ({})'.format(
if self.__num_features != len(features):
raise ValueError('dimensionality of this sample ({}) does not match existing samples ({})'.format(
len(features), self.__num_features))
assert isinstance(features, self.__dtype), TypeError(
"Mismatched dtype. Provide {}".format(self.__dtype))
if not isinstance(features, self.__dtype):
raise TypeError("Mismatched dtype. Provide {}".format(self.__dtype))

self.__data[sample_id] = features
self.__labels[sample_id] = label
Expand Down Expand Up @@ -339,11 +341,11 @@ def random_subset_ids(self, perc_per_class=0.5):
random.shuffle(this_class)
# calculating the requested number of samples
subset_size_this_class = np.int64(np.floor(class_size * perc_per_class))
# clipping the range to [0, n]
subset_size_this_class = max(0, min(class_size, subset_size_this_class))
if subset_size_this_class < 1 or this_class is None:
# clipping the range to [1, n]
subset_size_this_class = max(1, min(class_size, subset_size_this_class))
if subset_size_this_class < 1 or len(this_class) < 1 or this_class is None:
# warning if none were selected
warnings.warn('No subjects from class {} were selected.'.format(class_id))
raise ValueError('No subjects from class {} were selected.'.format(class_id))
else:
subsets_this_class = this_class[0:subset_size_this_class]
subsets.extend(subsets_this_class)
Expand Down Expand Up @@ -428,7 +430,7 @@ def description(self, str_val):
@property
def num_features(self):
"""number of features in each sample."""
return self.__num_features
return np.int64(self.__num_features)

@num_features.setter
def num_features(self, int_val):
Expand Down Expand Up @@ -553,6 +555,7 @@ def __copy(self, other):
self.__labels = copy.deepcopy(other.labels)
self.__dtype = copy.deepcopy(other.dtype)
self.__description = copy.deepcopy(other.description)
self.__feature_names = copy.deepcopy(other.feature_names)
self.__num_features = copy.deepcopy(other.num_features)

return self
Expand Down
144 changes: 74 additions & 70 deletions pyradigm/test_pyradigm.py
@@ -1,23 +1,24 @@
import tempfile
import os
import numpy as np
from pytest import raises
from pytest import raises, warns, set_trace

from pyradigm import MLDataset

out_dir = '.'
for ii in range(1):
num_classes = np.random.randint(2, 150, 1)
class_set = [ chr(x+65)+str(x) for x in range(num_classes)]
num_classes = np.random.randint(2, 150, 1)[0]
class_set = [ 'C{}'.format(x) for x in range(num_classes)]
class_sizes = np.random.randint(5, 200, num_classes)
num_features = np.random.randint(1, 300, 1).take(0)
feat_names = [ str(x) for x in range(num_features) ]

test_dataset = MLDataset()
for class_index, class_id in enumerate(class_set):
for sub_ix in xrange(class_sizes[class_index]):
subj_id = class_set[class_index] + str(sub_ix)
subj_id = '{}_S{}'.format(class_set[class_index],sub_ix)
feat = np.random.random(num_features)
test_dataset.add_sample(subj_id, feat, class_index, class_id)
test_dataset.add_sample(subj_id, feat, class_index, class_id, feat_names)

out_file = os.path.join(out_dir,'random_example_dataset{}.pkl'.format(ii))
test_dataset.save(out_file)
Expand Down Expand Up @@ -49,23 +50,44 @@ def test_num_classes():
def test_num_features():
assert test_dataset.num_features == num_features

def test_num_samples():
assert test_dataset.num_samples == sum(class_sizes)


def test_num_features():
assert test_dataset.num_features == num_features


def test_substract():
assert other_classes.num_samples == sum(class_sizes) - class_sizes[rand_index]

def test_add():
a = other_classes + random_class
n = a.num_samples
n1 = other_classes.num_samples
n2 = random_class.num_samples
assert n1 + n2 == n

def test_cant_read_nonexisting_file():
raises(IOError, MLDataset('/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl'))
with raises(IOError):
a = MLDataset('/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl')

def test_cant_write_to_nonexisting_dir():
raises(IOError, test_dataset.save('/nonexistentrandomdir/jdknvoindvi93/arbitrary.noname.pkl'))
with raises(IOError):
test_dataset.save('/nonexistentrandomdir/jdknvoindvi93/arbitrary.noname.pkl')

def test_invalid_constructor():
raises(ValueError, MLDataset(filepath=None,
in_dataset='/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl'))
raises(ValueError, MLDataset(filepath=None,
in_dataset=None,
data=list())) # data simply should not be a dict
raises(ValueError, MLDataset(filepath=None,
in_dataset=None,
data=None,
labels=None,
classes='invalid_value'))
with raises(ValueError):
a = MLDataset(in_dataset='/nonexistentrandomdir/disofddlsfj/arbitrary.noname.pkl')

with raises(ValueError):
# data simply should not be a dict
b = MLDataset(filepath=None, in_dataset=None, data=list())

with raises(ValueError):
c = MLDataset(filepath=None,
in_dataset=None, data=None, labels=None,
classes='invalid_value')

def test_return_data_labels():
matrix, vec_labels, sub_ids = test_dataset.data_and_labels()
Expand All @@ -84,7 +106,7 @@ def test_labels_setter():
test_dataset.labels = fewer_labels

same_len_diff_key = fewer_labels
same_len_diff_key['sldiursvdkvjs'] = 1
same_len_diff_key[u'sldiursvdkvjs'] = 1
with raises(ValueError):
test_dataset.labels = same_len_diff_key

Expand All @@ -108,58 +130,40 @@ def test_add_existing_id():
def test_add_new_id_diff_dim():
new_id = 'dsfdkfslj38748937439kdshfkjhf38'
sid = test_dataset.sample_ids[0]
data_diff_dim = np.random.rand([test_dataset.num_features+1,1])
data_diff_dim = np.random.rand(test_dataset.num_features+1,1)
with raises(ValueError):
test_dataset.add_sample(new_id, data_diff_dim, None, None)

def test_del_nonexisting_id():
nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
with raises(Warning):
nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
with warns(UserWarning):
test_dataset.del_sample(nonexisting_id)

def test_get_nonexisting_class():
nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
with raises(Warning):
nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
with warns(UserWarning):
test_dataset.get_class(nonexisting_id)

def test_rand_feat_subset():
nf = test_dataset.num_features
nf = copy_dataset.num_features
subset_len = np.random.randint(1, nf, 1).take(0)
subset= np.random.random_integers(1, nf, size=[subset_len,1] )
subds = test_dataset.get_feature_subset(subset)
subset= np.random.random_integers(1, nf, size=subset_len )
subds = copy_dataset.get_feature_subset(subset)
assert subds.num_features == subset_len

def test_eq_self():
assert test_dataset == test_dataset


def test_eq_copy():
assert test_dataset == copy_dataset

new_copy = MLDataset(in_dataset=copy_dataset)
assert new_copy == copy_dataset

def test_unpickling():
assert test_dataset == reloaded_dataset


def test_num_samples():
assert test_dataset.num_samples == sum(class_sizes)


def test_num_features():
assert test_dataset.num_features == num_features


def test_substract():
assert other_classes.num_samples == sum(class_sizes) - class_sizes[rand_index]


def test_add():
a = other_classes + random_class
n = a.num_samples
n1 = other_classes.num_samples
n2 = random_class.num_samples
assert n1 + n2 == n

out_file = os.path.join(out_dir, 'random_pickled_dataset.pkl')
copy_dataset.save(out_file)
reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset')
assert copy_dataset == reloaded_dataset

def test_subset_class():
assert random_class.num_samples == class_sizes[rand_index]
Expand All @@ -168,13 +172,13 @@ def test_subset_class():
def test_get_subset():
assert random_class == reloaded_dataset.get_class(random_class_name)

nonexisting_id = 'dsfdkfslj38748937439kdshfkjhf38'
with raises(Warning):
nonexisting_id = u'dsfdkfslj38748937439kdshfkjhf38'
with warns(UserWarning):
test_dataset.get_subset(nonexisting_id)

def test_membership():
member = test_dataset.sample_ids[0]
not_member = 'sdfdkshfdsk34823058wdkfhd83hifnalwe8fh8t'
not_member = u'sdfdkshfdsk34823058wdkfhd83hifnalwe8fh8t'
assert member in test_dataset
assert not_member not in test_dataset

Expand All @@ -186,7 +190,7 @@ def test_glance():

def test_random_subset():
for perc in np.arange(0.1, 1, 0.1):
subset = test_dataset.random_subset(perc_in_class=perc)
subset = copy_dataset.random_subset(perc_in_class=perc)
# separating the calculation by class to mimic the implementation in the class
expected_size = sum([int(np.floor(n_in_class*perc)) for n_in_class in class_sizes])
assert subset.num_samples == expected_size
Expand All @@ -195,39 +199,39 @@ def test_random_subset_by_count():

smallest_size = min(class_sizes)
for count in range(1,int(smallest_size)):
subset = test_dataset.random_subset_ids_by_count(count_per_class=count)
subset = copy_dataset.random_subset_ids_by_count(count_per_class=count)
assert len(subset) == num_classes*count

def test_train_test_split_ids_count():
smallest_size = min(class_sizes)
for count in range(1, int(smallest_size)):
subset_train, subset_test = test_dataset.train_test_split_ids(count_per_class=count)
subset_train, subset_test = copy_dataset.train_test_split_ids(count_per_class=count)
assert len(subset_train) == num_classes*count
assert len(subset_test) == test_dataset.num_samples-num_classes*count
assert len(subset_test ) == copy_dataset.num_samples-num_classes*count
assert len(set(subset_train).intersection(subset_test))==0

with raises(Warning):
test_dataset.train_test_split_ids(count_per_class=-1)
with warns(UserWarning):
copy_dataset.train_test_split_ids(count_per_class=-1)

with raises(Warning):
test_dataset.train_test_split_ids(count_per_class=test_dataset.num_samples+1.0)
with warns(UserWarning):
copy_dataset.train_test_split_ids(count_per_class=copy_dataset.num_samples+1.0)

with raises(ValueError):
with warns(UserWarning):
# both cant be specified at the same time
test_dataset.train_test_split_ids(count_per_class=2, train_perc=0.5)
copy_dataset.train_test_split_ids(count_per_class=2, train_perc=0.5)

def test_train_test_split_ids_perc():

for perc in np.arange(0.1, 0.9, 0.1):
subset_train, subset_test = test_dataset.train_test_split_ids(train_perc=perc)
for perc in np.arange(0.2, 0.9, 0.1):
subset_train, subset_test = copy_dataset.train_test_split_ids(train_perc=perc)
expected_train_size = sum(np.floor(class_sizes*perc))
assert len(subset_train) == expected_train_size
assert len(subset_test) == test_dataset.num_samples-expected_train_size
assert len(subset_test) == copy_dataset.num_samples-expected_train_size
assert len(set(subset_train).intersection(subset_test))==0

with raises(Warning):
test_dataset.train_test_split_ids(train_perc=1.1)
with warns(UserWarning):
copy_dataset.train_test_split_ids(train_perc=1.1)

with raises(Warning):
test_dataset.train_test_split_ids(train_perc=-1)
with warns(UserWarning):
copy_dataset.train_test_split_ids(train_perc=-1)

0 comments on commit 65dbdf9

Please sign in to comment.