In [1]:
import tensorflow as tf
from tensorflow.contrib import learn

from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

from sklearn.preprocessing import StandardScaler

import pandas as pd

from sklearn.metrics import mean_squared_error


df = pd.read_csv("data/final_train.csv")

trY = df['Item_Outlet_Sales']

df.drop(['Item_Identifier'], axis=1, inplace=True)
df.drop(['Item_Outlet_Sales'], axis=1, inplace=True)

df = df.astype(np.float32)

scaler_X = StandardScaler(with_mean=True, with_std=True)
trX = scaler_X.fit_transform(df)

trX, tsX, trY, tsY = train_test_split(trX, trY, test_size=0.33, random_state=42)

In [2]:
#Class that defines the behavior of the RBM
class RBM(object):
    
    def __init__(self, input_size, output_size, epochs=5, learning_rate=1, batchsize=1):
        #Defining the hyperparameters
        self._input_size = input_size #Size of input
        self._output_size = output_size #Size of output
        self.epochs = epochs #Amount of training iterations
        self.learning_rate = learning_rate #The step used in gradient descent
        self.batchsize = batchsize #The size of how much data will be used for training per sub iteration
        
        #Initializing weights and biases as matrices full of zeroes
        self.w = np.zeros([input_size, output_size], np.float32) #Creates and initializes the weights with 0
        self.hb = np.zeros([output_size], np.float32) #Creates and initializes the hidden biases with 0
        self.vb = np.zeros([input_size], np.float32) #Creates and initializes the visible biases with 0


    #Fits the result from the weighted visible layer plus the bias into a sigmoid curve
    def prob_h_given_v(self, visible, w, hb):
        #Sigmoid 
        return tf.nn.sigmoid(tf.matmul(visible, w) + hb)

    #Fits the result from the weighted hidden layer plus the bias into a sigmoid curve
    def prob_v_given_h(self, hidden, w, vb):
        return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(w)) + vb)
    
    #Generate the sample probability
    def sample_prob(self, probs):
        return tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))

    #Training method for the model
    def train(self, X):
        #Create the placeholders for our parameters
        _w = tf.placeholder("float", [self._input_size, self._output_size])
        _hb = tf.placeholder("float", [self._output_size])
        _vb = tf.placeholder("float", [self._input_size])
        
        prv_w = np.zeros([self._input_size, self._output_size], np.float32) #Creates and initializes the weights with 0
        prv_hb = np.zeros([self._output_size], np.float32) #Creates and initializes the hidden biases with 0
        prv_vb = np.zeros([self._input_size], np.float32) #Creates and initializes the visible biases with 0

        
        cur_w = np.zeros([self._input_size, self._output_size], np.float32)
        cur_hb = np.zeros([self._output_size], np.float32)
        cur_vb = np.zeros([self._input_size], np.float32)
        v0 = tf.placeholder("float", [None, self._input_size])
        
        #Initialize with sample probabilities
        h0 = self.sample_prob(self.prob_h_given_v(v0, _w, _hb))
        v1 = self.sample_prob(self.prob_v_given_h(h0, _w, _vb))
        h1 = self.prob_h_given_v(v1, _w, _hb)
        
        #Create the Gradients
        positive_grad = tf.matmul(tf.transpose(v0), h0)
        negative_grad = tf.matmul(tf.transpose(v1), h1)
        
        #Update learning rates for the layers
        update_w = _w + self.learning_rate *(positive_grad - negative_grad) / tf.to_float(tf.shape(v0)[0])
        update_vb = _vb +  self.learning_rate * tf.reduce_mean(v0 - v1, 0)
        update_hb = _hb +  self.learning_rate * tf.reduce_mean(h0 - h1, 0)
        
        #Find the error rate
        err = tf.reduce_mean(tf.square(v0 - v1))
        
        #Training loop
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            #For each epoch
            for epoch in range(self.epochs):
                #For each step/batch
                for start, end in zip(range(0, len(X), self.batchsize),range(self.batchsize,len(X), self.batchsize)):
                    batch = X[start:end]
                    #Update the rates
                    cur_w = sess.run(update_w, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_hb = sess.run(update_hb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_vb = sess.run(update_vb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    prv_w = cur_w
                    prv_hb = cur_hb
                    prv_vb = cur_vb
                error=sess.run(err, feed_dict={v0: X, _w: cur_w, _vb: cur_vb, _hb: cur_hb})
                print('Epoch: {} --> Reconstruction error={}'.format(epoch, error))
            self.w = prv_w
            self.hb = prv_hb
            self.vb = prv_vb

    #Create expected output for our DBN
    def rbm_outpt(self, X):
        input_X = tf.constant(X)
        _w = tf.constant(self.w)
        _hb = tf.constant(self.hb)
        out = tf.nn.sigmoid(tf.matmul(input_X, _w) + _hb)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            return sess.run(out)

In [3]:
RBM_hidden_sizes = [150, 1]

#Since we are training, set input as training data
inpX = trX

#Create list to hold our RBMs
rbm_list = []

#Size of inputs is the number of inputs in the training set
input_size = inpX.shape[1]

#For each RBM we want to generate
for i, size in enumerate(RBM_hidden_sizes):
    print('RBM: {}  {} --> {}'.format(i, input_size, size))
    rbm_list.append(RBM(input_size, size, epochs=20, learning_rate=0.5, batchsize=20))
    input_size = size

RBM: 0  10 --> 150
RBM: 1  150 --> 1


In [4]:
#For each RBM in our list
for rbm in rbm_list:
    print('New RBM:')
    #Train a new one
    rbm.train(inpX)
    inpX = inpX.astype(np.float32)
    #Return the output layer
    inpX = rbm.rbm_outpt(inpX)

New RBM:
Epoch: 0 --> Reconstruction error=0.8700782060623169
Epoch: 1 --> Reconstruction error=0.8636254668235779
Epoch: 2 --> Reconstruction error=0.8611057996749878
Epoch: 3 --> Reconstruction error=0.8589401841163635
Epoch: 4 --> Reconstruction error=0.8584511280059814
Epoch: 5 --> Reconstruction error=0.8574615716934204
Epoch: 6 --> Reconstruction error=0.8572171926498413
Epoch: 7 --> Reconstruction error=0.854599118232727
Epoch: 8 --> Reconstruction error=0.8541574478149414
Epoch: 9 --> Reconstruction error=0.8540276288986206
Epoch: 10 --> Reconstruction error=0.8531795740127563
Epoch: 11 --> Reconstruction error=0.8534522652626038
Epoch: 12 --> Reconstruction error=0.8541721105575562
Epoch: 13 --> Reconstruction error=0.8526024222373962
Epoch: 14 --> Reconstruction error=0.8528524041175842
Epoch: 15 --> Reconstruction error=0.8524441719055176
Epoch: 16 --> Reconstruction error=0.8516952395439148
Epoch: 17 --> Reconstruction error=0.8526866436004639
Epoch: 18 --> Reconstruction e

In [5]:
total_len = trX.shape[0]

# Parameters
training_epochs = 1250
batch_size = 20
display_step = 100
# Network Parameters
n_input = trX.shape[1]

# tf Graph input
x = tf.placeholder("float", [None, trX.shape[1]])
y = tf.placeholder("float", [None])

In [6]:
# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)

#     # Hidden layer with RELU activation
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     layer_2 = tf.nn.relu(layer_2)

    # Output layer with linear activation
    out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
    return out_layer

In [7]:
def load_from_rbms(rbm_list):
        #If everything is correct, bring over the weights and biases
        weights = {}
        biases = {}
        for i in range(len(rbm_list)):
            if i == len(rbm_list)-1:
                s = 'out'
                weights[s] = tf.Variable(rbm_list[i].w)
                biases[s] = tf.Variable(rbm_list[i].hb)
            else:
                s = 'h' + str(i+1)
                weights[s] = tf.Variable(rbm_list[i].w)
                s = 'b' + str(i+1)
                biases[s] = tf.Variable(rbm_list[i].hb)
        return weights, biases

In [8]:
weights, biases = load_from_rbms(rbm_list)

# Construct model
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.sqrt(tf.reduce_mean(tf.square(pred-y)))
optimizer = tf.train.AdamOptimizer().minimize(cost)

In [9]:
# Launch the graph
sess = tf.InteractiveSession()

sess.run(tf.global_variables_initializer())

# Training cycle
for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(total_len/batch_size)
    # Loop over all batches
    for i in range(total_batch-1):
        batch_x = trX[i*batch_size:(i+1)*batch_size]
        batch_y = trY[i*batch_size:(i+1)*batch_size]
        # Run optimization op (backprop) and cost op (to get loss value)
        _, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
                                                      y: batch_y})
        # Compute average loss
        avg_cost += c / total_batch

    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch:", '%04d' % (epoch+1), "cost =", \
            "{:.9f}".format(avg_cost))

print ("Optimization Finished!")

Epoch: 0001 cost = 920571.478947368
Epoch: 0101 cost = 1882.324786270
Epoch: 0201 cost = 1719.933937260
Epoch: 0301 cost = 1704.744489061
Epoch: 0401 cost = 1698.501349627
Epoch: 0501 cost = 1695.191352496
Epoch: 0601 cost = 1693.138752399
Epoch: 0701 cost = 1691.750159762
Epoch: 0801 cost = 1690.759858141
Epoch: 0901 cost = 1690.031583873
Epoch: 1001 cost = 1689.478513269
Epoch: 1101 cost = 1689.046579890
Epoch: 1201 cost = 1688.698512670
Optimization Finished!


In [10]:
# Test model
error = tf.square(pred - y)
# Calculate accuracy
accuracy = tf.sqrt(tf.reduce_mean(tf.cast(error, "float")))
print(sess.run(accuracy, feed_dict={x: tsX, y: tsY}))

1684.07
