Skip to content

Commit

Permalink
Created a special module for making datasets. Decoupled it from the d…
Browse files Browse the repository at this point in the history
…ataset module in utils.
  • Loading branch information
ragav committed Jan 18, 2017
1 parent 23da23e commit cdfc187
Show file tree
Hide file tree
Showing 10 changed files with 254 additions and 172 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
.cache
.eggs
*.png
/visualizer
/visualizer
.vscode
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ To cook a mnist dataset for yann run the following code:

.. code-block:: python
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
cook_mnist()
Running this code will print a statement to the following effect ``>>Dataset xxxxx is created.``
Expand Down
14 changes: 14 additions & 0 deletions docs/source/yann/special/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.. _datasets:

:mod:`datasets` - provides quick methods to produce common datasets.
====================================================================

The file ``yann.special.datasets.py`` contains the definition for some methods that can produce
quickly some datasets. Some of them include :

* :mod:`cook_mnist`
* :mod:`cook_cifar10`
* ...

.. automodule:: yann.special.datasets
:members:
4 changes: 2 additions & 2 deletions pantry/tutorials/autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def autoencoder ( dataset= None, verbose = 1 ):
dataset = None
if len(sys.argv) > 1:
if sys.argv[1] == 'create_dataset':
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 2)
dataset = data.dataset_location()
else:
Expand All @@ -137,7 +137,7 @@ def autoencoder ( dataset= None, verbose = 1 ):

if dataset is None:
print " creating a new dataset to run through"
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 2)
dataset = data.dataset_location()

Expand Down
4 changes: 2 additions & 2 deletions pantry/tutorials/gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def simple_gan ( dataset= None, verbose = 1 ):
dataset = None
if len(sys.argv) > 1:
if sys.argv[1] == 'create_dataset':
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 2)
dataset = data.dataset_location()
else:
Expand All @@ -202,7 +202,7 @@ def simple_gan ( dataset= None, verbose = 1 ):

if dataset is None:
print " creating a new dataset to run through"
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 2)
dataset = data.dataset_location()

Expand Down
4 changes: 2 additions & 2 deletions pantry/tutorials/lenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
dataset = None
if len(sys.argv) > 1:
if sys.argv[1] == 'create_dataset':
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 2)
dataset = data.dataset_location()
else:
Expand All @@ -296,7 +296,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):

if dataset is None:
print " creating a new dataset to run through"
from yann.utils.dataset import cook_cifar10
from yann.special.datasets import cook_cifar10
data = cook_cifar10 (verbose = 2)
dataset = data.dataset_location()

Expand Down
4 changes: 2 additions & 2 deletions pantry/tutorials/log_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def log_reg ( dataset, verbose ):
import sys
if len(sys.argv) > 1:
if sys.argv[1] == 'create_dataset':
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 3)
dataset = data.dataset_location()
else:
Expand All @@ -90,7 +90,7 @@ def log_reg ( dataset, verbose ):

if dataset is None:
print " creating a new dataset to run through"
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 3)
dataset = data.dataset_location()

Expand Down
4 changes: 2 additions & 2 deletions pantry/tutorials/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def mlp ( dataset, verbose = 1 ):
dataset = None
if len(sys.argv) > 1:
if sys.argv[1] == 'create_dataset':
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 3)
dataset = data.dataset_location()
else:
Expand All @@ -109,7 +109,7 @@ def mlp ( dataset, verbose = 1 ):

if dataset is None:
print " creating a new dataset to run through"
from yann.utils.dataset import cook_mnist
from yann.special.datasets import cook_mnist
data = cook_mnist (verbose = 3)
dataset = data.dataset_location()

Expand Down
226 changes: 226 additions & 0 deletions yann/special/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
from yann.utils.dataset import setup_dataset

def cook_mnist_normalized( verbose = 1,
**kwargs):
"""
Wrapper to cook mnist dataset. Will take as input,
Args:
save_directory: which directory to save the cooked dataset onto.
dataset_parms: default is the dictionary. Refer to :mod:`setup_dataset`
preprocess_params: default is the dictionary. Refer to :mod:`setup_dataset`
Notes:
By default, this will create a dataset that is not mean-subtracted.
"""

if not 'data_params' in kwargs.keys():

data_params = {
"source" : 'skdata',
"name" : 'mnist',
"location" : '',
"mini_batch_size" : 500,
"mini_batches_per_batch" : (100, 20, 20),
"batches2train" : 1,
"batches2test" : 1,
"batches2validate" : 1,
"height" : 28,
"width" : 28,
"channels" : 1 }

else:
data_params = kwargs['data_params']

if not 'preprocess_params' in kwargs.keys():

# parameters relating to preprocessing.
preprocess_params = {
"normalize" : True,
"GCN" : False,
"ZCA" : False,
"grayscale" : False,
"mean_subtract" : False,
}
else:
preprocess_params = kwargs['preprocess_params']

if not 'save_directory' in kwargs.keys():
save_directory = '_datasets'
else:
save_directory = kwargs ['save_directory']

dataset = setup_dataset(dataset_init_args = data_params,
save_directory = save_directory,
preprocess_init_args = preprocess_params,
verbose = 3)
return dataset

def cook_mnist_normalized_mean_subtracted( verbose = 1,
**kwargs):
"""
Wrapper to cook mnist dataset. Will take as input,
Args:
save_directory: which directory to save the cooked dataset onto.
dataset_parms: default is the dictionary. Refer to :mod:`setup_dataset`
preprocess_params: default is the dictionary. Refer to :mod:`setup_dataset`
"""

if not 'data_params' in kwargs.keys():

data_params = {
"source" : 'skdata',
"name" : 'mnist',
"location" : '',
"mini_batch_size" : 500,
"mini_batches_per_batch" : (100, 20, 20),
"batches2train" : 1,
"batches2test" : 1,
"batches2validate" : 1,
"height" : 28,
"width" : 28,
"channels" : 1 }

else:
data_params = kwargs['data_params']

if not 'preprocess_params' in kwargs.keys():

# parameters relating to preprocessing.
preprocess_params = {
"normalize" : True,
"GCN" : False,
"ZCA" : False,
"grayscale" : False,
"mean_subtract" : True,
}
else:
preprocess_params = kwargs['preprocess_params']

if not 'save_directory' in kwargs.keys():
save_directory = '_datasets'
else:
save_directory = kwargs ['save_directory']

dataset = setup_dataset(dataset_init_args = data_params,
save_directory = save_directory,
preprocess_init_args = preprocess_params,
verbose = 3)
return dataset

def cook_mnist_multi_load( verbose = 1, **kwargs):
"""
Testing code, mainly.
Wrapper to cook mnist dataset. Will take as input,
Args:
save_directory: which directory to save the cooked dataset onto.
dataset_parms: default is the dictionary. Refer to :mod:`setup_dataset`
preprocess_params: default is the dictionary. Refer to :mod:`setup_dataset`
Notes:
This just creates a ``data_params`` that loads multiple batches without cache. I use this
to test the cahcing working on datastream module.
"""

if not 'data_params' in kwargs.keys():

data_params = {
"source" : 'skdata',
"name" : 'mnist',
"location" : '',
"mini_batch_size" : 500,
"mini_batches_per_batch" : (20, 5, 5),
"batches2train" : 5,
"batches2test" : 4,
"batches2validate" : 4,
"height" : 28,
"width" : 28,
"channels" : 1 }

else:
data_params = kwargs['data_params']

if not 'preprocess_params' in kwargs.keys():
# parameters relating to preprocessing.
preprocess_params = {
"normalize" : True,
"GCN" : False,
"ZCA" : False,
"grayscale" : False,
"mean_subtract" : True,
}
else:
preprocess_params = kwargs['preprocess_params']

if not 'save_directory' in kwargs.keys():
save_directory = '_datasets'
else:
save_directory = kwargs ['save_directory']

dataset = setup_dataset(dataset_init_args = data_params,
save_directory = save_directory,
preprocess_init_args = preprocess_params,
verbose = 3)
return dataset

def cook_cifar10_normalized_mean_subtracted(verbose = 1, **kwargs):
"""
Wrapper to cook cifar10 dataset. Will take as input,
Args:
save_directory: which directory to save the cooked dataset onto.
dataset_parms: default is the dictionary. Refer to :mod:`setup_dataset`
preprocess_params: default is the dictionary. Refer to :mod:`setup_dataset`
"""

if not 'data_params' in kwargs.keys():

data_params = {
"source" : 'skdata',
"name" : 'cifar10',
"location" : '',
"mini_batch_size" : 500,
"mini_batches_per_batch" : (80, 20, 20),
"batches2train" : 1,
"batches2test" : 1,
"batches2validate" : 1,
"height" : 32,
"width" : 32,
"channels" : 3 }

else:
data_params = kwargs['data_params']

if not 'preprocess_params' in kwargs.keys():

# parameters relating to preprocessing.
preprocess_params = {
"normalize" : True,
"GCN" : False,
"ZCA" : False,
"grayscale" : False,
mean_subtract : True,
}
else:
preprocess_params = kwargs['preprocess_params']

if not 'save_directory' in kwargs.keys():
save_directory = '_datasets'
else:
save_directory = kwargs ['save_directory']

dataset = setup_dataset(dataset_init_args = data_params,
save_directory = save_directory,
preprocess_init_args = preprocess_params,
verbose = 3)
return dataset


# Just some wrappers
cook_mnist = cook_mnist_normalized_mean_subtracted
cook_cifar10 = cook_cifar10_normalized_mean_subtracted

if __name__ == '__main__':
pass

0 comments on commit cdfc187

Please sign in to comment.