In [29]:
import tensorflow as tf
import numpy as np
import tensorflow.contrib.factorization as factor
import pickle
import gzip

with gzip.open('tmp/mnist.pkl.gz', 'rb') as file:
    train_set, valid_set, test_set = pickle.load(file, encoding='iso-8859-1')
train_x, train_y = train_set
valid_x, valid_y = valid_set
test_x, test_y = test_set

train_y = tf.one_hot(train_y, 10, dtype=np.float64)
valid_y = tf.one_hot(valid_y, 10, dtype=np.float64)
test_y = tf.one_hot(test_y, 10, dtype=np.float64)

In [None]:
class NeuralNet:
    def __init__(self, size_in, size_out, hidden, rate = 0.001, w_decay = 0, av = None, loss = None):
        self.x = np.zeros((size_in, 1), dtype=np.float64)
        self.y = np.zeros((size_out, 1), dtype=np.float64)
        self.weight = []
        self.weight_ = []
        self.bias = []
        self.bias_ = []
        self.z = []
        self.activations = []
        
        idx = 0
        self.layer = len(hidden) + 1
        n = self.layer - 1
        self.weight.append(np.random.rand(hidden[idx], size_in) * np.sqrt(2 / size_in))
        self.weight_.append(np.zeros((hidden[idx], size_in), dtype=np.float64))
        self.bias.append(np.random.rand(hidden[idx], 1))
        self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        idx += 1
        
        while idx < n:
            self.weight.append(np.random.rand(hidden[idx], hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
            self.weight_.append(np.zeros((hidden[idx], hidden[idx - 1]), dtype=np.float64))
            self.bias.append(np.random.rand(hidden[idx], 1))
            self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            idx += 1
        
        self.weight.append(np.random.rand(size_out, hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
        self.weight_.append(np.zeros((size_out, hidden[idx - 1]), dtype=np.float64))
        self.bias.append(np.random.rand(size_out, 1))
        self.bias_.append(np.zeros((size_out, 1), dtype=np.float64))
        self.activations.append(np.zeros((size_out, 1), dtype=np.float64))
        self.z.append(np.zeros((size_out, 1), dtype=np.float64))
        
        self.rate = rate
        self.w_decay = w_decay
        
        if av != None:
            if(hasattr(av[0], '__call__') and  hasattr(av[1], '__call__')):
                self.activate = av[0]
                self.activate_ = av[1]
                
        if loss != None:
            if(hasattr(loss[0], '__call__') and  hasattr(loss[1], '__call__')):
                self.cost = loss[0]
                self.cost_ = loss[1]
    
    def activate(self, x):
        return (1 - np.exp(-(x * 2))) / (1 + np.exp(-(x * 2)))
    
    def activate_(self, x):
        return 1 - np.square(self.activate(x))   
    
    def cost(self, y):
        return (self.y - y) ** 2
    
    def cost_(self, y):
        return (self.y - y) * 2
    
    def feed(self, x):
        self.x[:] = x.reshape((x.shape[0], 1))
        idx = 0
        n = self.layer - 1
        self.z[idx] = self.weight[idx].dot(self.x) + self.bias[idx]
        self.activations[idx] = self.activate(self.z[idx])
        idx += 1
        
        while idx < n:
            self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
            self.activations[idx] = self.activate(self.z[idx])
            idx += 1
        
        self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
        self.y = self.activate(self.z[idx])
        
    def propagate(self, y):
        y = y.reshape((y.shape[0], 1))
        idx = self.layer - 1
        i_ = self.activate_(self.z[idx]) * self.cost_(y)
        self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
        self.bias_[idx] = i_
        c_ = self.weight[idx].T.dot(i_)
        idx -= 1
        
        while idx > 0:
            i_ = self.activate_(self.z[idx]) * c_
            self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
            self.bias_[idx] = i_
            c_ = self.weight[idx].T.dot(i_)
            idx -= 1
        
        i_ = self.activate_(self.z[idx]) * c_
        self.weight_[idx] = i_.dot(self.x.T)
        self.bias_[idx] = i_
        
        
        while idx < self.layer:
            self.weight_[idx] += self.weight[idx] * self.w_decay
            w, b = self.update(self.weight_[idx], self.bias_[idx], idx)
            self.weight[idx] -= w
            self.bias[idx] -= b
            idx += 1
    
    def update(self, w_, b_, idx):
        return self.rate * w_, self.rate * b_
    
    def heetal_w(self, cur, prev, com):
        return np.random.randn(com, cur) * np.sqrt(2 / prev)
           
    def result(self):
        return self.y
    
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return (2 / (1 + np.exp(-2 * x))) - 1

def tanh_(x):
    return 1 - np.square(tanh(x))

def relu(x, a = 0.01):
    return x * (x > 0)

def relu_(x, a = 0.01):
    return 1 * (x > 0)
    
def soe(dif):
    return np.square(dif)

def soe_(dif):
    return 2 * dif

class AdaDelta:
    def __init__(self, ann, arbitary = 1e-25, moment = 0.9):
        self.epsilon = arbitary
        self.moment = moment
        self.rate = ann.rate
        self.E = [[x for x in self.genE(ann)], [x for x in self.genE(ann, is_weight=False)]]
        self.delta = [[x for x in self.genE(ann)], [x for x in self.genE(ann, is_weight=False)]]
        ann.update = self
        
    def evaluate(self, w_, b_, idx):
        self.E[0][idx] = self.moment * self.E[0][idx] + (1 - self.moment) * (w_ ** 2)
        delta = self.rate * w_
        delta /= np.sqrt(self.E[0][idx] + self.epsilon)
        old_delta = self.delta[0][idx]
        self.delta[0][idx] = self.moment * self.delta[0][idx] + (1 - self.moment) * (delta ** 2)
        
        self.E[1][idx] = self.moment * self.E[1][idx] + (1 - self.moment) * (b_ ** 2)
        delta = self.rate * b_
        delta /= np.sqrt(self.E[1][idx] + self.epsilon)
        old_delta_b = self.delta[1][idx]
        self.delta[1][idx] = self.moment * self.delta[1][idx] + (1 - self.moment) * (delta ** 2)
        
        w_rate = np.sqrt(old_delta + self.epsilon)
        w_rate /= np.sqrt(self.E[0][idx] + self.epsilon)
        
        b_rate = np.sqrt(old_delta_b + self.epsilon)
        b_rate /= np.sqrt(self.E[1][idx] + self.epsilon)
        
        return w_rate, b_rate
        
    def __call__(self, w_, b_, idx):
        w_rate, b_rate = self.evaluate(w_, b_, idx)
        return w_rate * w_, b_rate * b_
            
    def genE(self, ann, is_weight = True):
        l = ann.layer
        idx = 0
        while idx < l:
            yield np.zeros(ann.weight_[idx].shape if is_weight else ann.bias_[idx].shape, dtype=np.float64)
            idx += 1

In [42]:
### K-Means
n_features = 784 # Input: A 28*28 pixels
n_classes = 10 # Digit: 0 - 9
n_clusters = 24 # 24 Centroids
distance_func = 'cosine' # cosine distance
n_steps = 50 # Total steps to train
n_batch = 1024 # The number of samples per batch

# Input images
X = tf.placeholder(tf.float32, shape=[None, n_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, n_classes])

# K-Means Parameters
kmeans = factor.KMeans(inputs=X, num_clusters=n_clusters, distance_metric='cosine',
                use_mini_batch=True)

# Build KMeans graph
training_graph = kmeans.training_graph()

if len(training_graph) > 6: # Tensorflow 1.4+
    (all_scores, cluster_idx, scores, cluster_centers_initialized,
     cluster_centers_var, init_op, train_op) = training_graph
else:
    (all_scores, cluster_idx, scores, cluster_centers_initialized,
     init_op, train_op) = training_graph

cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
sess.run(init_vars, feed_dict={X: train_x})
sess.run(init_op, feed_dict={X: train_x})

# Training
for i in range(1, n_steps + 1):
    _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
                         feed_dict={X: train_x})
    if i % 10 == 0 or i == 1:
        print("Step %i, Avg Distance: %f" % (i, d))

# Assign a label to each centroid
# Count total number of labels per centroid, using the label of each training
# sample to their closest centroid (given by 'idx')
counts = np.zeros(shape=(n_clusters, n_classes))
for i in range(len(idx)):
    counts[idx[i]] += train_y
# Assign the most frequent label to the centroid
labels_map = [np.argmax(c) for c in counts]
labels_map = tf.convert_to_tensor(labels_map)

# Evaluation ops
# Lookup: centroid_id -> label
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
# Compute accuracy
correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Test Model
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))

Step 1, Avg Distance: 0.343382
Step 10, Avg Distance: 0.221354
Step 20, Avg Distance: 0.219730
Step 30, Avg Distance: 0.219138
Step 40, Avg Distance: 0.218805
Step 50, Avg Distance: 0.218582
[15 14 10 ... 12 18 21]


ValueError: setting an array element with a sequence.

ValueError: Cannot evaluate tensor using `eval()`: No default session is registered. Use `with sess.as_default()` or pass an explicit session to `eval(session=sess)`