Skip to content

Commit

Permalink
added option to not print confusion
Browse files Browse the repository at this point in the history
  • Loading branch information
Ragav Venkatesan (Student) authored and Ragav Venkatesan (Student) committed Mar 17, 2017
1 parent 5f51387 commit 8c55e6a
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 36 deletions.
62 changes: 35 additions & 27 deletions yann/modules/resultor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class resultor(module):
"learning_rate" : "<learning_rate_file_name>.txt"
"momentum" : <momentum_file_name>.txt
"visualize" : <bool>
"print_confusion" : <bool>
"id" : id of the resultor
}
Expand Down Expand Up @@ -69,6 +70,9 @@ def __init__( self, resultor_init_args, verbose = 1):
if not "viualize" in resultor_init_args.keys():
resultor_init_args["visualize"] = True

if not"print_confusion" in resultor_init_args.keys():
resultor_init_args["print_confusion"] = False

for item, value in resultor_init_args.iteritems():
if item == "root":
self.root = value
Expand All @@ -84,6 +88,8 @@ def __init__( self, resultor_init_args, verbose = 1):
self.learning_rate = value
elif item == "momentum":
self.momentum = value
elif item == "print_confusion":
self.print_confusion = value


if not hasattr(self, 'root'): raise Exception('root variable has not been provided. \
Expand Down Expand Up @@ -159,35 +165,37 @@ def print_confusion (self, epoch=0, train = None, valid = None, test = None, ver
test: testing confusion matrix as gained by the test method.
verbose: As usual.
"""
if verbose >=3:
print ("... Printing confusion matrix")
if not os.path.exists(self.root + '/confusion'):
if verbose >= 3:
print "... Creating a root directory for saving confusions"
os.makedirs(self.root + '/confusion')
if self.print_confusion is True:

if verbose >=3:
print ("... Printing confusion matrix")
if not os.path.exists(self.root + '/confusion'):
if verbose >= 3:
print "... Creating a root directory for saving confusions"
os.makedirs(self.root + '/confusion')

location = self.root + '/confusion' + '/epoch_' + str(epoch)
if not os.path.exists( location ):
if verbose >=3 :
print "... Making the epoch directory"
os.makedirs (location)

location = self.root + '/confusion' + '/epoch_' + str(epoch)
if not os.path.exists( location ):
if verbose >=3 :
print "... Making the epoch directory"
os.makedirs (location)

if verbose >=3 :
print ("... Saving down the confusion matrix")

if not train is None:
self._store_confusion_img (confusion = train,
filename = location + '/train_confusion.eps',
verbose = 2)
if not valid is None:
self._store_confusion_img (confusion = valid,
filename = location + '/valid_confusion.eps',
verbose = 2)

if not test is None:
self._store_confusion_img (confusion = test,
filename = location + '/test_confusion.eps',
verbose = 2)
print ("... Saving down the confusion matrix")

if not train is None:
self._store_confusion_img (confusion = train,
filename = location + '/train_confusion.eps',
verbose = 2)
if not valid is None:
self._store_confusion_img (confusion = valid,
filename = location + '/valid_confusion.eps',
verbose = 2)

if not test is None:
self._store_confusion_img (confusion = test,
filename = location + '/test_confusion.eps',
verbose = 2)

def _store_confusion_img (self, confusion, filename, verbose = 2):
"""
Expand Down
204 changes: 195 additions & 9 deletions yann/special/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,188 @@ def _create_skdata_mnist(self, verbose = 1):
cPickle.dump(dataset_args, f, protocol=2)
f.close()

def _split_data (self, data):
def _mat2yann (self, verbose = 1):
"""
Interal function. Use this to create matlab image datasets
This is modfied for the split dataset from the original ``setup_dataset`` class.
"""
if verbose >=2:
print (".. Creating a split dataset")

type = 'train'
if verbose >= 2:
print ( ".. creating data " + type )

batches = self.batches2train
new = True
for batch in xrange(batches): # for each batch_i file....
if verbose >= 3:
print ( "... batch " +str(batch) )

data_x_batch, data_y_batch = load_data_mat(location = self.location,
batch = batch,
type_set = type,
height = self.height,
width = self.width,
channels = self.channels)
if new is True:
data_x = data_x_batch
data_y = data_y_batch
new = False
else:
data_x = numpy.concatenate( (data_x, data_x_batch) , axis = 0)
data_y = numpy.concatenate( (data_y, data_y_batch) , axis = 0)

data_x, data_y = self._split_data (( data_x, data_y ), y1 = False )
data_x = preprocessing ( data = data_x,
height = self.height,
width = self.width,
channels = self.channels,
args = self.preprocessor )

training_sample_size = data_x.shape[0]
training_mini_batches_available = int(numpy.floor(training_sample_size / self.mini_batch_size))

if training_mini_batches_available < self.batches2train * self.mini_batches_per_batch[0]:
#self.mini_batches_per_batch = ( training_batches_available/self.batches2train,
# self.mini_batches_per_batch [1],
# self.mini_batches_per_batch [2] )
self.batches2train = int(numpy.floor(training_mini_batches_available / self.mini_batches_per_batch[0]))

loc = self.root + "/train/"
data_x = check_type(data_x, theano.config.floatX)
data_y = check_type(data_y, theano.config.floatX)

for batch in xrange(self.batches2train):
start_index = batch * self.cache_images[0]
end_index = start_index + self.cache_images[0]
data2save = (data_x [start_index:end_index,], data_y[start_index:end_index,] )
pickle_dataset(loc = loc, data = data2save, batch=batch)


type = 'valid'
if verbose >= 2:
print ( ".. creating data " + type )
batches = self.batches2validate
new = True
del(data_x)
del(data_y)

for batch in xrange(batches): # for each batch_i file....
if verbose >= 3:
print ( "... batch " +str(batch) )

data_x_batch, data_y_batch = load_data_mat(location = self.location,
batch = batch,
type_set = type,
height = self.height,
width = self.width,
channels = self.channels)

if new is True:
data_x = data_x_batch
data_y = data_y_batch
new = False
else:
data_x = numpy.concatenate( (data_x, data_x_batch) , axis = 0)
data_y = numpy.concatenate( (data_y, data_y_batch) , axis = 0)

# data_x, data_y = self._split_data (( data_x, data_y ), y1 = False )
data_x = preprocessing ( data = data_x,
height = self.height,
width = self.width,
channels = self.channels,
args = self.preprocessor )


validation_sample_size = data_x.shape[0]
validation_mini_batches_available = int(numpy.floor(
validation_sample_size / self.mini_batch_size))

if validation_mini_batches_available < self.batches2validate * self.mini_batches_per_batch[1]:
self.batches2validate = int(numpy.floor(validation_mini_batches_available \
/ self.mini_batches_per_batch[1]))

loc = self.root + "/valid/"
data_x = check_type(data_x, theano.config.floatX)
data_y = check_type(data_y, theano.config.floatX)

for batch in xrange(self.batches2validate):
start_index = batch * self.cache_images[1]
end_index = start_index + self.cache_images[1]
data2save = (data_x [start_index:end_index,], data_y[start_index:end_index,] )
pickle_dataset(loc = loc, data = data2save, batch=batch)

type = 'train'
if verbose >= 2:
print ( ".. creating data " + type )
batches = self.batches2test
new = True
del(data_x)
del(data_y)

for batch in xrange(batches): # for each batch_i file....
if verbose >= 3:
print ( "... batch " +str(batch) )

data_x_batch, data_y_batch = load_data_mat(location = self.location,
batch = batch,
type_set = type,
height = self.height,
width = self.width,
channels = self.channels)
if new is True:
data_x = data_x_batch
data_y = data_y_batch
new = False
else:
data_x = numpy.concatenate( (data_x, data_x_batch) , axis = 0)
data_y = numpy.concatenate( (data_y, data_y_batch) , axis = 0)

# data_x, data_y = self._split_data (( data_x, data_y ), y1 = False )
data_x = preprocessing ( data = data_x,
height = self.height,
width = self.width,
channels = self.channels,
args = self.preprocessor )

testing_sample_size = data_x.shape[0]
testing_mini_batches_available = int(numpy.floor(testing_sample_size / self.mini_batch_size))

if testing_mini_batches_available < self.batches2test * self.mini_batches_per_batch[2]:
self.batches2test = int(numpy.floor(testing_mini_batches_available \
/ self.mini_batches_per_batch[2]))

loc = self.root + "/test/"
data_x = check_type(data_x, theano.config.floatX)
data_y = check_type(data_y, theano.config.floatX)

for batch in xrange(self.batches2test):
start_index = batch * self.cache_images[2]
end_index = start_index + self.cache_images[2]
data2save = (data_x [start_index:end_index,], data_y[start_index:end_index,] )
pickle_dataset(loc = loc, data = data2save, batch=batch)

dataset_args = {
"location" : self.root,
"mini_batch_size" : self.mini_batch_size,
"cache_batches" : self.mini_batches_per_batch,
"batches2train" : self.batches2train,
"batches2test" : self.batches2test,
"batches2validate" : self.batches2validate,
"height" : self.height,
"width" : self.width,
"channels" : 1 if self.preprocessor ["grayscale"] else self.channels,
"cache" : self.cache,
"splits" : self.splits
}

assert ( self.height * self.width * self.channels == numpy.prod(data_x.shape[1:]) )
f = open(self.root + '/data_params.pkl', 'wb')
cPickle.dump(dataset_args, f, protocol=2)
f.close()

def _split_data (self, data, y1 = True):
"""
This is an internal method that will split the datasets.
Expand All @@ -982,25 +1163,30 @@ def _split_data (self, data):
tuple: split data in the same format as data.
"""
n_shots = self.splits["p"]
data_x, data_y, data_y1 = data
if y1 is True:
data_x, data_y, data_y1 = data
else:
data_x, data_y = data
locs = numpy.zeros(len(data_y), dtype = bool)
for label in xrange(self.n_classes + 1):
temp = numpy.zeros(len(data_y), dtype = bool)
temp[data_y==label] = True
if label in self.splits["shot"]:
count = 0
for element in xrange(len(temp)):
if temp[element] == True: # numpy needs == rather than 'is'
if temp[element] == True: # numpy needs == rather than 'is'
count = count + 1
if count > n_shots:
temp[element] = False
if count > n_shots:
temp[element] = False
locs[temp] = True
data_x = data_x[locs]
data_y = data_y[locs]
data_y1 = data_y1[locs]
return (data_x, data_y, data_y1)

if y1 is True:
data_y1 = data_y1[locs]
return (data_x, data_y, data_y1)
else:
return (data_x, data_y)


if __name__ == '__main__':
Expand Down

0 comments on commit 8c55e6a

Please sign in to comment.