Skip to content

Commit

Permalink
Prepping for Tutorials.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ragav Venkatesan committed Feb 10, 2017
1 parent 76490a9 commit 08d599f
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
/visualizer
/lenet5
.vscode
.yann_data
svhn
55 changes: 55 additions & 0 deletions pantry/tutorials/mat2yann.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
This is a tutorial to setup any dataset in matlab format to be used by YANN.
Still working on this.
"""
def cook_svhn_normalized( location, verbose = 1, **kwargs):
"""
This method demonstrates how to cook a dataset for yann from matlab.
Args:
location: provide the location where the dataset is created and stored.
Refer to prepare_svhn.m file to understand how to prepare a dataset.
save_directory: which directory to save the cooked dataset onto.
dataset_parms: default is the dictionary. Refer to :mod:`setup_dataset`
preprocess_params: default is the dictionary. Refer to :mod:`setup_dataset`.
Notes:
By default, this will create a dataset that is not mean-subtracted.
"""

if not 'save_directory' in kwargs.keys():
save_directory = '_datasets'
else:
save_directory = kwargs ['save_directory']

if not 'data_params' in kwargs.keys():

data_params = {
"source" : 'mat',
"name" : 'yann_svhn',
"location" : location,
"height" : 32,
"width" : 32,
"channels" : 3 }

else:
data_params = kwargs['data_params']

if not 'preprocess_params' in kwargs.keys():

# parameters relating to preprocessing.
preprocess_params = {
"normalize" : True,
"ZCA" : False,
"grayscale" : False,
"zero_mean" : False,
}
else:
preprocess_params = kwargs['preprocess_params']

dataset = setup_dataset(dataset_init_args = data_params,
save_directory = save_directory,
preprocess_init_args = preprocess_params,
verbose = 3)
return dataset
3 changes: 2 additions & 1 deletion yann/special/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def cook_mnist_multi_load( verbose = 1, **kwargs):
verbose = 3)
return dataset

def cook_cifar10(verbose = 1, **kwargs):
def cook_cifar10_normalized(verbose = 1, **kwargs):
"""
Wrapper to cook cifar10 dataset. Will take as input,
Expand Down Expand Up @@ -321,6 +321,7 @@ def cook_caltech256(verbose = 1, **kwargs):

# Just some wrappers
cook_mnist = cook_mnist_normalized
cook_cifar10 = cook_cifar10_normalized

if __name__ == '__main__':
pass
63 changes: 25 additions & 38 deletions yann/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,36 +71,6 @@ def download_data (url, location):
print status,
f.close()

def load_svhn():
"""
Function that downloads the dataset from a url and returns the dataset in full
Returns:
list: ``[(train_x, train_y, train_y),(valid_x, valid_y, valid_y), (test_x, test_y, test_y)]``
"""
train_mat = "https://www.dropbox.com/s/uyssbz9ar7879az/batch_0.mat?dl=1"
if not os.path.exists('.yann_data'):
os.mkdir('.yann_data')
if not os.path.exists('.yann_data/svhn/'):
os.mkdir('.yann_data/svhn/')
if not os.path.exists('.yann_data/svhn/train'):
os.mkdir('.yann_data/svhn/train')
if not os.path.exists('.yann_data/svhn/train/batch_0.mat'):
download_data (url = train_mat,
location = '.yann_data/svhn/train/')
os.rename('.yann_data/svhn/train/batch_0.mat?dl=1', '.yann_data/svhn/train/batch_0.mat')
dataset_location = '.yann_data/svhn/'
train_x, train_y, train_y1 = load_data_mat(classes = 10,
height = 32,
width= 32,
channels = 3,
location = dataset_location,
type_set = 'train',
batch = 0
)
import pdb
pdb.set_trace()

def load_cifar100 ():
"""
Function that downloads the cifar 100 dataset and returns the dataset in full
Expand Down Expand Up @@ -765,12 +735,25 @@ class setup_dataset (object):
'skdata' : Download and setup from skdata
'matlab' : Data is created and is being used from Matlab
"name" : necessary only for skdata
supports 'mnist','mnist_noise1', 'mnist_noise2', 'mnist_noise3',
'mnist_noise4', 'mnist_noise5', 'mnist_noise6', 'mnist_bg_images',
'mnist_bg_rand', 'mnist_rotated', 'mnist_rotated_bg'. Refer to
original paper by Hugo Larochelle [1] for these dataset details.
supports
* ``'mnist'``
* ``'mnist_noise1'``
* ``'mnist_noise2'``
* ``'mnist_noise3'``
* ``'mnist_noise4'``
* ``'mnist_noise5'``
* ``'mnist_noise6'``
* ``'mnist_bg_images'``
* ``'mnist_bg_rand'``
* ``'mnist_rotated'``
* ``'mnist_rotated_bg'``.
* ``'cifar10'``
* ``'caltech101'``
* ``'caltech256'``
Refer to original paper by Hugo Larochelle [1] for these dataset details.
"location" : #necessary for 'pkl' and 'matlab'
"mini_batch_size" : 500,
"mini_batch_size" : 500,
"mini_batches_per_batch" : (100, 20, 20), # trianing, testing, validation
"batches2train" : 1,
"batches2test" : 1,
Expand Down Expand Up @@ -863,7 +846,8 @@ def __init__(self,
self.source = dataset_init_args [ "source" ]
if self.source == 'skdata':
self.name = dataset_init_args ["name"]
else:

elif self.source == 'mat:
self.location = dataset_init_args [ "location" ]

if "height" in dataset_init_args.keys():
Expand Down Expand Up @@ -937,6 +921,10 @@ def __init__(self,
start_time = time.clock()
if self.source == 'skdata':
self._create_skdata(verbose = verbose)

if self.source == 'mat':
self._mat2yann( verbose =verbose ) # This needs to be done still.

end_time = time.clock()
if verbose >=1:
print(". Dataset " + self.id + " is created.")
Expand Down Expand Up @@ -974,7 +962,7 @@ def _create_skdata(self,verbose=1):
self._create_skdata_caltech101(verbose = verbose)

elif self.name == 'caltech256':
self._create_skdata_caltech256(verbose = verbose)
self._create_skdata_caltech256(verbose = verbose)

def _create_skdata_mnist(self, verbose = 1):
"""
Expand Down Expand Up @@ -1090,7 +1078,6 @@ def _create_skdata_mnist(self, verbose = 1):
cPickle.dump(dataset_args, f, protocol=2)
f.close()


def _create_skdata_caltech101(self, verbose = 2):
"""
Interal function. Use this to create mnist and caltech101 image datasets
Expand Down

0 comments on commit 08d599f

Please sign in to comment.