-
Notifications
You must be signed in to change notification settings - Fork 0
/
load.py
128 lines (106 loc) · 3.74 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gzip
import os
import glob
import numpy as np
import tensorflow as tf
import numpy as np
# def normalize (raw):
# mins = np.amin(raw,axis=0)
# offset = map (lambda x: x if x >= 0 else -x, mins)
# positive = np.add(raw, offset)
# maxs = np.amax(positive,axis=0)
# return np.true_divide(positive,maxs)
def extract_data (dir,num):
filelist = glob.glob(dir)
inps = []
labels = []
for filename in filelist:
data = np.genfromtxt(filename, delimiter=',')
if len(data) == 0:
continue
if len(inps) == 0 :
inps = data[:,1:]
labels = data[:,0]
else:
inps = np.append(inps, data[:,1:], axis=0)
labels = np.append(labels, data[:,0], axis=0)
if num > 0 and len(inps) >= num:
inps = inps[:num]
labels = labels[:num]
break
return inps, labels[:,None]
class DataSet(object):
def __init__(self, inputs, labels, dtype=tf.float32):
"""Construct a DataSet.
one_hot arg is used only if fake_data is true. `dtype` can be either
`uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
`[0, 1]`.
"""
dtype = tf.as_dtype(dtype).base_dtype
assert inputs.shape[0] == labels.shape[0], ('inputs.shape: %s labels.shape: %s' % (inputs.shape,
labels.shape))
self._num_examples = inputs.shape[0]
self._inputs = inputs
self._labels = labels
self._epochs_completed = 0
self._index_in_epoch = 0
@property
def inputs(self):
return self._inputs
@property
def labels(self):
return self._labels
@property
def num_examples(self):
return self._num_examples
@property
def epochs_completed(self):
return self._epochs_completed
def num_batch (self, batch_size):
if batch_size < self._num_examples :
return int(self._num_examples / batch_size)
else:
return 1
def next_batch(self, batch_size):
"""Return the next `batch_size` examples from this data set."""
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = np.arange(self._num_examples)
np.random.shuffle(perm)
self._inputs = self._inputs[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
if batch_size < self._num_examples :
self._index_in_epoch = batch_size
else:
self._index_in_epoch = self._num_examples - 1
end = self._index_in_epoch
return self._inputs[start:end], self._labels[start:end]
def read_data_sets(datad, validation, test, dtype=tf.float32, num=0):
class DataSets(object):
pass
data_sets = DataSets()
data, labels = extract_data(datad,num)
size = len(labels)
validation_s = int(size * validation)
test_s = int(size * test)
training_s = size - validation_s - test_s
print "Read: %d examples", size
print " Training set = ", training_s
print " Validation set = ", validation_s
print " Test set = ", test_s
train_data = data[:training_s]
train_labels = labels[:training_s]
validation_data = data[training_s:training_s+validation_s]
validation_labels = labels[training_s:training_s+validation_s]
test_data = data[training_s+validation_s:training_s+validation_s+test_s]
test_labels = labels[training_s+validation_s:training_s+validation_s+test_s]
data_sets.train = DataSet(train_data, train_labels, dtype=dtype)
data_sets.validation = DataSet(validation_data, validation_labels, dtype=dtype)
data_sets.test = DataSet(test_data, test_labels, dtype=dtype)
return data_sets