In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from numpy import exp
from math import log
numpy_rng = np.random.RandomState(1234)

In [7]:
train_df = pd.read_csv('/content/drive/MyDrive/Fashion_MNIST/fashion-mnist_train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Fashion_MNIST/fashion-mnist_test.csv')

In [8]:
train_data_df = train_df.drop('label', axis=1)
train_data_np = train_data_df.to_numpy()
train_bin = np.where(train_data_np > 127, 1, 0)
train_data = train_bin.reshape(train_bin.shape[0], 784)
train_labels = train_df['label']
train_data,val_data,train_labels,val_labels = train_test_split(train_data, train_labels, test_size=0.1, random_state=42)

In [9]:
test_data_df = test_df.drop('label', axis=1)
test_data_np = test_data_df.to_numpy()
test_bin = np.where(test_data_np > 127, 1, 0)
test_data = test_bin.reshape(test_bin.shape[0], 784)
test_labels = test_df['label']

In [10]:
def sigmoid(x):
    return 1. / (1 + np.exp(-x))

In [None]:
def eval(data,W,b):
  return softmax(np.reshape(np.matmul(W.T,data),(10,1)) + b)

In [None]:
def one_hot_encoding(l, L):
  e = []
  for i in range(L):
    if i == l:
      e.append(1)
    else:
      e.append(0)
  e_y = np.asarray(e)
  return np.reshape(e_y,(10,1))

In [None]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [None]:
def train(data,labels,W,b,lr = 0.1):

  t = []
  
  for j in range(data.shape[0]):
    t.append(one_hot_encoding(labels[j],10))
    #print(o[j])
    #print(o)
  for i in range(50):
    grad_w = np.zeros([W.shape[0],W.shape[1]])
    grad_b = np.zeros([b.shape[0],1])
    o = []
    for j in range(data.shape[0]):
      o.append(eval(data[j],W,b))
      mul2 = np.reshape((o[j]-t[j]),[10,1])
      mul1 = np.reshape(data[j],[data[j].shape[0],1])
      grad_w += np.matmul(mul1,mul2.T)
      grad_b += mul2
      W -= lr*grad_w
      b -= lr*grad_b
      grad_w = np.zeros([W.shape[0],W.shape[1]])
      grad_b = np.zeros([b.shape[0],1])
  return W,b

In [None]:
def test(data,labels,W,b):
  cross_entropy = 0;
  count = 0;
  t = []
  o = []
  for j in range(data.shape[0]):
    o.append(eval(data[j],W,b))
    t.append(one_hot_encoding(labels[j],10))
    
    index_t = np.argmax(t[j],axis = 0)
    index_o = np.argmax(o[j],axis = 0)
    #print(index,o[j],t[j])
   # print(index, o[j])
    if index_t == index_o:
      count += 1
  #  print(t[j][index],o[j][index])
   # print(o[j][index_t])
    cross_entropy -= log(o[j][index_t]+1e-300)
  accuracy = count
  cross_entropy = cross_entropy/data.shape[0]

  return cross_entropy, accuracy

In [12]:
class RBM(object):
    def __init__(self, input=None, n_visible=2, n_hidden=3, W=None, hbias=None, vbias=None):
        
        if W is None:
            initial_W = np.random.randn(n_visible, n_hidden) * 0.01
            W = initial_W

        if hbias is None:
            hbias = np.random.randn(n_hidden) * 0.01  # initialize h bias 0

        if vbias is None:
            vbias = np.random.randn(n_visible) * 0.01 # initialize v bias 0

        self.n_visible = n_visible 
        self.n_hidden = n_hidden   
        self.input = input
        self.W = W
        self.hbias = hbias
        self.vbias = vbias

    def contrastive_divergence(self, lr=0.1, k=1, input=None):
        if input is not None:
            self.input = input
        

        ph_mean, ph_sample = self.sample_h_given_v(self.input)

        chain_start = ph_sample

        for step in range(k):
            if step == 0:
                nv_means, nv_samples,\
                nh_means, nh_samples = self.gibbs_hvh(chain_start)
            else:
                nv_means, nv_samples,\
                nh_means, nh_samples = self.gibbs_hvh(nh_samples)

        self.W += lr * (np.matmul(self.input.T, ph_mean) - np.matmul(nv_samples.T,nh_means))/self.input.shape[0]
        self.vbias += lr * np.mean(self.input - nv_samples, axis=0)
        self.hbias += lr * np.mean(ph_mean - nh_means, axis=0)

    def sample_h_given_v(self, v0_sample):
        h1_mean = self.propup(v0_sample)
        h1_sample = numpy_rng.binomial(size=h1_mean.shape, n=1, p=h1_mean)
        return [h1_mean, h1_sample]


    def sample_v_given_h(self, h0_sample):
        v1_mean = self.propdown(h0_sample)
        v1_sample = numpy_rng.binomial(size=v1_mean.shape, n=1, p=v1_mean) 
        return [v1_mean, v1_sample]

    def propup(self, v):
        pre_sigmoid_activation = np.dot(v, self.W) + self.hbias
        return sigmoid(pre_sigmoid_activation)

    def propdown(self, h):
        pre_sigmoid_activation = np.dot(h, self.W.T) + self.vbias
        return sigmoid(pre_sigmoid_activation)


    def gibbs_hvh(self, h0_sample):

        v1_mean, v1_sample = self.sample_v_given_h(h0_sample)
        h1_mean, h1_sample = self.sample_h_given_v(v1_sample)

        return [v1_mean, v1_sample,
                h1_mean, h1_sample]

    def get_hidden_reps(self, v,t):
        h_v = sigmoid(np.dot(v, self.W) + self.hbias)
        h_t = sigmoid(np.dot(t, self.W) + self.hbias)
        #reconstructed_v = sigmoid(np.dot(h, self.W.T) + self.vbias)
        return h_v,h_t
        

def train_rbm(rbm, v,t,learning_rate=0.05, k = 1):
    rbm.contrastive_divergence(lr=learning_rate,k=k)
    return rbm.get_hidden_reps(v,t)


if __name__ == "__main__":
    # construct RBM
    n_visible=784
    n_hidden=64
    k = 5
    rbm = RBM(input=train_data, n_visible=n_visible, n_hidden=n_hidden)

    for epoch in range(25):

      hidden_rep,hidden_rep_test= train_rbm(rbm, val_data,test_data,k=k)
      print('Training epoch: %d' %epoch)
      
      #W_l, b_l = train(hidden_rep,val_labels,W_l,b_l,lr=0.1)
      loss, accuracy = train_test(hidden_rep, val_labels, hidden_rep_test,test_labels)
      print('Loss:{} Accuracy:{}'.format(loss,accuracy))

Training epoch: 0
Loss:1.0965067955625127 Accuracy:0.6572
Training epoch: 1
Loss:1.2802919785876252 Accuracy:0.6305
Training epoch: 2
Loss:1.714053384608428 Accuracy:0.5133
Training epoch: 3
Loss:1.1920651200996626 Accuracy:0.6456
Training epoch: 4
Loss:2.0510004285549104 Accuracy:0.2857


KeyboardInterrupt: ignored

In [None]:
def swp():
  