Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

py3 compatibility, typos/small bugs and added setup.py (for pip install) #1

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
*.pyc
*.pyc
data/
.*
!/.gitignore
68 changes: 38 additions & 30 deletions darch/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@
import numpy as np
import scipy as sp
import tensorflow as tf
import cPickle
try:
import cPickle
except ImportError:
import pickle as cPickle
import os

class InMemoryDataset:
"""Wrapper around a dataset for iteration that allows cycling over the
dataset.
"""Wrapper around a dataset for iteration that allows cycling over the
dataset.

This functionality is especially useful for training. One can specify if
This functionality is especially useful for training. One can specify if
the data is to be shuffled at the end of each epoch. It is also possible
to specify a transformation function to applied to the batch before
being returned by next_batch.

"""

def __init__(self, X, y, shuffle_at_epoch_begin, batch_transform_fn=None):
if X.shape[0] != y.shape[0]:
assert ValueError("X and y the same number of examples.")
Expand All @@ -30,10 +33,10 @@ def get_num_examples(self):
return self.X.shape[0]

def next_batch(self, batch_size):
"""Returns the next batch in the dataset.
"""Returns the next batch in the dataset.

If there are fewer that batch_size examples until the end
of the epoch, next_batch returns only as many examples as there are
of the epoch, next_batch returns only as many examples as there are
remaining in the epoch.

"""
Expand Down Expand Up @@ -70,11 +73,11 @@ def _extract_fn(x):
X = x.images
y = x.labels

if not normalize_range:
if not normalize_range:
X *= 255.0

return (X, y)

Xtrain, ytrain = _extract_fn(mnist.train)
Xval, yval = _extract_fn(mnist.validation)
Xtest, ytest = _extract_fn(mnist.test)
Expand All @@ -85,36 +88,41 @@ def load_cifar10(data_dir, flatten=False, one_hot=True, normalize_range=False,
whiten_pixels=True, border_pad_size=0):
"""Loads all of CIFAR-10 in a numpy array.

Provides a few options for the output formats. For example,
Provides a few options for the output formats. For example,
normalize_range returns the output images with pixel values in [0.0, 1.0].
The other options are self explanatory. Border padding corresponds to
The other options are self explanatory. Border padding corresponds to
upsampling the image by zero padding the border of the image.

"""
train_filenames = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4']
val_filenames = ['data_batch_5']
test_filenames = ['test_batch']

# NOTE: this function uses some arguments from the outer scope, namely
# NOTE: this function uses some arguments from the outer scope, namely
# flatten, one_hot, normalize_range, and possibly others once added.
def _load_data(fpath):
with open(fpath, 'rb') as f:
d = cPickle.load(f)
with open(fpath, 'rb') as f:
try:
d = cPickle.load(f)
except UnicodeDecodeError:
f.seek(0)
d = cPickle.load(f, encoding='bytes')
d = {k.decode(): v for k, v in d.items()} # change keys into strings

# for the data
X = d['data'].astype('float32')

# reshape the data to the format (num_images, height, width, depth)
# reshape the data to the format (num_images, height, width, depth)
num_images = X.shape[0]
num_classes = 10
X = X.reshape( (num_images, 3, 32, 32) )
X = X.transpose( (0,2,3,1) )
X = X.astype('float32')

# transformations based on the argument options.
if normalize_range:
X = X / 255.0

if flatten:
X = X.reshape( (num_images, -1) )

Expand Down Expand Up @@ -143,7 +151,7 @@ def _load_data_multiple_files(fname_list):
y_full = np.concatenate(y_parts, axis=0)

return (X_full, y_full)

Xtrain, ytrain = _load_data_multiple_files(train_filenames)
Xval, yval = _load_data_multiple_files(val_filenames)
Xtest, ytest = _load_data_multiple_files(test_filenames)
Expand All @@ -169,19 +177,19 @@ def onehot_to_idx(y_onehot):
return y_idx

def idx_to_onehot(y_idx, num_classes):
num_images = y.shape[0]
num_images = y_idx.shape[0]
y_one_hot = np.zeros( (num_images, num_classes), dtype='float32')
y_one_hot[ np.arange(num_images), y ] = 1.0
y_one_hot[ np.arange(num_images), y_idx ] = 1.0

return y_one_hot

def center_crop(X, out_height, out_width):
num_examples, in_height, in_width, in_depth = X.shape
assert out_height <= in_height and out_width <= in_width

start_i = (in_height - out_height) / 2
start_j = (in_width - out_width) / 2
out_X = X[:, start_i : start_i + out_height, start_j : start_j + out_width, :]
start_i = (in_height - out_height) // 2
start_j = (in_width - out_width) // 2
out_X = X[:, start_i : start_i + out_height, start_j : start_j + out_width, :]

return out_X

Expand All @@ -194,7 +202,7 @@ def random_crop(X, out_height, out_width):
start_is = np.random.randint(in_height - out_height + 1, size=num_examples)
start_js = np.random.randint(in_width - out_width + 1, size=num_examples)
out_X = []
for ind in xrange(num_examples):
for ind in range(num_examples):
st_i = start_is[ind]
st_j = start_js[ind]

Expand Down Expand Up @@ -222,22 +230,22 @@ def per_image_whiten(X):
X_flat = X.reshape((num_examples, -1))
X_mean = X_flat.mean(axis=1)
X_cent = X_flat - X_mean[:, None]
X_norm = np.sqrt( np.sum( X_cent * X_cent, axis=1) )
X_norm = np.sqrt( np.sum( X_cent * X_cent, axis=1) )
X_out = X_cent / X_norm[:, None]
X_out = X_out.reshape(X.shape)
X_out = X_out.reshape(X.shape)

return X_out

# Assumes the following ordering for X: (num_images, height, width, num_channels)
def zero_pad_border(X, pad_size):
n, height, width, num_channels = X.shape
X_padded = np.zeros((n, height + 2 * pad_size, width + 2 * pad_size,
X_padded = np.zeros((n, height + 2 * pad_size, width + 2 * pad_size,
num_channels), dtype='float32')
X_padded[:, pad_size:height + pad_size, pad_size:width + pad_size, :] = X

return X_padded

# auxiliary functions for
# auxiliary functions for
def get_augment_cifar_data_train(out_height, out_width, p_flip):
def augment_fn(X, y):
X_out = random_crop(X, out_height, out_width)
Expand Down
4 changes: 2 additions & 2 deletions darch/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ def compute_accuracy(dataset, ev_feed, ev_batch_size):
val_num_examples = self.val_dataset.get_num_examples()

# Training cycle
for epoch in xrange(self.training_epochs):
for epoch in range(self.training_epochs):
avg_cost = 0.
total_batch = int(train_num_examples / batch_size)
# Loop over all batches
for i in xrange(total_batch):
for i in range(total_batch):
batch_x, batch_y = self.train_dataset.next_batch(batch_size)
#print((batch_x.shape, batch_y.shape))
#import ipdb; ipdb.set_trace()
Expand Down
Loading