In [None]:
!pip install wandb



***Importing Libraries and Load Data***

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
from keras.datasets import mnist
from sklearn.manifold import TSNE
import wandb

# load dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
class_type = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] 

proj_name='CS6910 ASSIGNMENT 4 GIBBS'

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


***Modify and Restructure Data***

In [None]:
# Initializations

# Data
X_train = np.array(x_train.reshape(x_train.shape[0], 784,1))         # reshape 2-D data to 1-D
X_test  = np.array(x_test.reshape(x_test.shape[0], 784,1))           # reshape 2-D data to 1-D
X_train = (X_train > 126) * 1                                        # convert the real valued data into binary data, using a threshold of 127
X_test  = (X_test > 126) * 1                                         # convert the real valued data into binary data, using a threshold of 127

X_val = X_train[-15000:]                                             # validation set input (to train Classifier)
X_train = X_train[0:45000]                                           # training set input (to train RBM)

Y_train = np.zeros([len(y_train),10,1])
Y_test = np.zeros([len(y_test),10,1])

for i in range(len(y_train)):                                        # convert y from just a class number to one hot vector (10x1)
  y = np.zeros([10, 1])
  y[y_train[i]] = 1.0
  Y_train[i] = y

for i in range(len(y_test)):                                         # convert y from just a class number to one hot vector (10x1)
  y = np.zeros([10, 1])
  y[y_test[i]] = 1.0
  Y_test[i] = y                                                      # test set output

Y_val = Y_train[-15000:]                                             # validation set output
Y_train = Y_train[0:45000]                                           # training set output

***Hyperparameters and Basic variables***

In [None]:
n_visible = X_train.shape[1]                                         # number of visible neurons
n_train_examples = X_train.shape[0]                                  # number of training data
n_val_examples = X_val.shape[0]                                      # number of validation data
n_test_examples = X_test.shape[0]                                    # number of test data
n_hidden = 80                                                        # number of hidden neurons
n_class = 10



***Parameter Initialization (Weights and Biases)***

In [None]:
def rbm_param_init() :                                                                        # Function to randomly initialize the RBM network parameters 
	
  rbm_parameters = {}
  rbm_parameters["W"] = np.random.randn(n_hidden, n_visible)*np.sqrt(6./(n_visible + n_hidden))   # Xavier Initialization of weights
  rbm_parameters["h_bias"] = np.zeros((n_hidden,1),dtype=np.float64)                                                     
  rbm_parameters["v_bias"] = np.zeros((n_visible,1),dtype=np.float64)

  return rbm_parameters

def classifier_param_init() :                                                                 # Function to randomly initialize the Classifier network parameters

  classifier = {}
  classifier["W"] = np.random.randn(n_class, n_hidden)*np.sqrt(6./(n_class + n_hidden))       # Xavier Initialization of weights
  classifier["b"] = np.zeros((n_class,1),dtype=np.float64)

  return classifier

def sigmoid(x) :                                                                              # RBM hidden layer activation function 
	
  return 1.0/(1.0+np.exp(-x))

def softmax(x):                                                                               # Output activation function
    
  return np.exp(x) / np.sum(np.exp(x))  

***RBM and Classifier Trainer***

In [None]:
def rbm_train(X_train,parameters,k,r,learning_rate) :
  
  W = parameters["W"]
  h_bias = parameters["h_bias"]
  v_bias = parameters["v_bias"]

  for i in range(n_train_examples) :
    
      v_init = X_train[i]
      v_sample = np.random.randint(2, size=np.shape(X_train[i]))
      dw       = np.zeros(np.shape(W))
      dv       = np.zeros(np.shape(v_bias))
      dh       = np.zeros(np.shape(h_bias))
                          
      for t in range(k + r):
         if t < k:
                                                                                            # Markov Chain - Loop
            h_given_v = sigmoid(np.dot(W,v_sample)+h_bias)                                  # Evaluate p(h|v)
            h_sample = np.random.binomial(1,h_given_v)                                      # Convert to 0's and 1's assuming binomial distribution p(h|v) 
            v_given_h = sigmoid(np.dot(np.transpose(W),h_sample)+v_bias)                    # Evaluate p(v|h)
            v_sample = np.random.binomial(1,v_given_h)                                      # Convert to 0's and 1's assuming binomial distribution p(v|h)
         
         else:
            
            h_given_v = sigmoid(np.dot(W,v_sample)+h_bias)                                  # Evaluate p(h|v)
            h_sample = np.random.binomial(1,h_given_v)                                      # Convert to 0's and 1's assuming binomial distribution p(h|v) 
            v_given_h = sigmoid(np.dot(np.transpose(W),h_sample)+v_bias)                    # Evaluate p(v|h)
            v_sample = np.random.binomial(1,v_given_h)                                      # Convert to 0's and 1's assuming binomial distribution p(v|h)
            
            dw = dw + 1/r*(np.dot(sigmoid(np.dot(W,v_sample)+h_bias),np.transpose(v_sample)))  # Last r samples are used to form the average term in update
            dv = dv + 1/r*(v_sample)
            dh = dh + 1/r*(sigmoid(np.dot(W,v_sample)+h_bias))


      # Update Rule
      W = W + learning_rate*(np.dot(sigmoid(np.dot(W,v_init)+h_bias),np.transpose(v_init)) - dw)
      v_bias = v_bias + learning_rate*(v_init-dv)
      h_bias = h_bias + learning_rate*(sigmoid(np.dot(W,v_init)+h_bias) - dh)
  
  
  parameters["W"] = W
  parameters["h_bias"] = h_bias
  parameters["v_bias"] = v_bias
  print("Training Complete")

  return parameters


def get_hidden(x,parameters) :                                                                # function to get the hidden representation of the test data
   
    W = parameters["W"]
    h_bias = parameters["h_bias"]
    hidden_prob = sigmoid(np.dot(W,x)+h_bias)
    hidden_rep = np.random.binomial(1,hidden_prob)
    
    return hidden_rep

def classifier_train(X,Y,rbm_parameters,classifier_param,classifier_epochs,learning_rate)  :  # Function to train Classifier
    
    W = classifier_param["W"]
    b = classifier_param["b"]
    
    for epoch in range(classifier_epochs) :
      for i in range(n_val_examples) :
         # feed forward
         hidden_rep = get_hidden(X[i],rbm_parameters)                                         # Obtain hidden representation for the given sample
         pre_output = np.dot(W,hidden_rep)+b
         y_hat = softmax(pre_output)
         # backpropogate
         dW = np.dot(-(Y[i]-y_hat),np.transpose(hidden_rep))
         db = -(Y[i]-y_hat)
         # Update Classifier weights
         W = W - learning_rate*dW
         b = b - learning_rate*db

    classifier_param["W"] = W
    classifier_param["b"] = b 
    
    return classifier_param   


In [None]:
def rbm_classifier(X_train,Y_train,X_val,Y_val,X_test,Y_test,rbm_epochs,classifier_epochs,k,r,n_hidden,learning_rate) :
  classifier_param = classifier_param_init() 
  rbm_parameters = rbm_param_init()

  for j in range(rbm_epochs) :

     rbm_parameters = rbm_train(X_train,rbm_parameters,k,r,learning_rate)                                             # Train RBM for one epoch
     classifier_param = classifier_train(X_val,Y_val,rbm_parameters,classifier_param,classifier_epochs,learning_rate) # Train the classifier with hidden representation of RBM
     
     accuracy = 0.0
     loss = 0.0 
     # Evaluate accuracy and loss over test data
     for i in range(n_test_examples) :
        h = get_hidden(X_test[i],rbm_parameters)
        y_hat = softmax(np.dot(classifier_param["W"],h)+classifier_param["b"])

        if y_hat.argmax()==Y_test[i].argmax():
            accuracy = accuracy + 1
        loss = loss + -1*np.sum(np.multiply(y,np.log(y_hat)))

     accuracy = accuracy/n_test_examples
     loss = loss/n_test_examples 
     print("Epoch :" + str(j)+" "+ str(accuracy)+" "+str(loss))
     wandb.log({"Accuracy":accuracy,"Loss":loss,"Epoch":j})
  
  return rbm_parameters,classifier_param
      

	

In [None]:
def train():
  hyperparameter_defaults=dict(
        rbm_epochs = 5,
        classifier_epochs = 3,
        k = 40,
        r = 20,
        n_hidden = 80,
        learning_rate = 0.001                                                                    
        )

  wandb.init(config=hyperparameter_defaults)

  config=wandb.config
  rbm_parameters,classifier_param = rbm_classifier(X_train,Y_train,X_val,Y_val,X_test,Y_test,config.rbm_epochs,config.classifier_epochs,config.k,config.r,config.n_hidden,config.learning_rate)




In [None]:
def sweeper(sweep_config,proj_name):
  sweep_id = wandb.sweep(sweep_config,project=proj_name,entity='cs6910krsrd',)
  wandb.agent(sweep_id,train,project=proj_name,entity='cs6910krsrd',)

In [None]:
#sweep dictionary
sweep_config={
    'method':'bayes',
    'metric':{
        'name':'accuracy',
        'goal':'maximize'},

}

parameters_dict={
    
    'rbm_epochs':{
      'values':[5]  
    },
    'classifier_epochs':{
        'values':[1]
    },
    'k':{
        'values':[200]   #,500,1000]
    },
    'r':{
        'values':[10]
    },
    'n_hidden':{
        'values':[64]      #,128,256]
    },
    'learning_rate':{
        'values':[0.01]
    }
    
}

sweep_config['parameters']=parameters_dict

In [None]:
sweeper(sweep_config,proj_name)