In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from numpy import exp
from math import log
numpy_rng = np.random.RandomState(1234)

In [4]:
train_df = pd.read_csv('/content/drive/MyDrive/Fashion_MNIST/fashion-mnist_train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Fashion_MNIST/fashion-mnist_test.csv')

In [5]:
train_data_df = train_df.drop('label', axis=1)
train_data_np = train_data_df.to_numpy()
train_bin = np.where(train_data_np > 127, 1, 0)
train_data = train_bin.reshape(train_bin.shape[0], 784)
train_labels = train_df['label'].to_frame().to_numpy()
train_data,val_data,train_labels,val_labels = train_test_split(train_data, train_labels, test_size=0.1, random_state=42)
#train_data = [train_data[i].flatten() for i in range(train_data.shape[0])]
#train_data = [np.reshape(train_data[i],(1,train_data[i].shape[0])) for i in range(len(train_data))]

In [6]:
train_data.shape

(54000, 784)

In [7]:
test_data_df = test_df.drop('label', axis=1)
test_data_np = test_data_df.to_numpy()
test_bin = np.where(test_data_np > 127, 1, 0)
test_data = test_bin.reshape(test_bin.shape[0], 784)
test_labels = test_df['label'].to_frame().to_numpy()
#test_data = [test_data[i].flatten() for i in range(test_data.shape[0])]
#test_data = [np.reshape(test_data[i],(1,test_data[i].shape[0])) for i in range(len(test_data))]

In [8]:
def eval(data,W,b):
  return softmax(np.reshape(np.matmul(W.T,data),(10,1)) + b)

In [9]:
def one_hot_encoding(l, L):
  import numpy
  e = []
  for i in range(L):
    if i == l:
      e.append(1)
    else:
      e.append(0)
  e_y = np.asarray(e)
  return np.reshape(e_y,(10,1))

In [10]:
def softmax(vector):
	e = exp(vector)
	return e / e.sum()

In [11]:
def train(data,labels,W,b,lr = 0.05):

  t = []
  o = []
  for j in range(data.shape[0]):
    o.append(eval(data[j],W,b))
    t.append(one_hot_encoding(labels[j],10))
  for i in range(10):
    grad_w = np.zeros([W.shape[0],W.shape[1]])
    grad_b = np.zeros([b.shape[0],1])
    for j in range(data.shape[0]):
      mul2 = np.reshape((o[j]-t[j]),[10,1])
      mul1 = np.reshape(data[j],[32,1])
      grad_w += np.matmul(mul1,mul2.T)
      grad_b += mul2
    W = W - lr*grad_w
    b = b - lr*grad_b
  
  return W,b

In [12]:
def test(data,labels,W,b):
  cross_entropy = 0;
  count = 0;
  t = []
  o = []
  for j in range(data.shape[0]):
    o.append(eval(data[j],W,b))
    t.append(one_hot_encoding(labels[j],10))
    index_t = np.argmax(t[j],axis = 0)
    index_o = np.argmax(o[j],axis = 0)
    if index_t == index_o:
      count += 1
    cross_entropy -= log(o[j][index_t]+1e-300)
  accuracy = count/data.shape[0]

  return cross_entropy, accuracy

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,log_loss
def train_test(x_train, y_train, x_test,y_test):
  logisticRegr = LogisticRegression(max_iter = 1000,multi_class='multinomial')
  logisticRegr.fit(x_train, y_train)
  #print(logisticRegr.classes_)
  y_pred = logisticRegr.predict(x_test)
  pred = logisticRegr.predict_proba(x_test)
  #print(y_pred)
  #print(y_test)
  loss = log_loss(y_test,pred)
  accuracy = accuracy_score(y_test, y_pred)
  return loss, accuracy

In [14]:
import sys
import numpy

def sigmoid(x):
    return 1. / (1 + np.exp(-x))

numpy.seterr(all='ignore')

class RBM(object):
     def __init__(self, input=None, n_visible=2, n_hidden=3, W=None, hbias=None, vbias=None):
        
        if W is None:
            initial_W = np.random.randn(n_visible, n_hidden) * 0.01
            W = initial_W

        if hbias is None:
            hbias = np.random.randn(n_hidden) * 0.01  # initialize h bias 0

        if vbias is None:
            vbias = np.random.randn(n_visible) * 0.01 # initialize v bias 0

        self.n_visible = n_visible 
        self.n_hidden = n_hidden   
        self.input = input
        self.W = W
        self.hbias = hbias
        self.vbias = vbias

def gibbs_sampling(rbm, lr=0.1, k=200, r=10):

  ph_mean, ph_sample = sample_h_given_v(rbm,np.random.randn(rbm.input.shape[0], rbm.input.shape[1]))

  chain_start = ph_sample

  for step in range(k):
      print("Sampling k={}".format(step))
      if step == 0:
          nv_means, nv_samples,\
          nh_means, nh_samples = gibbs_hvh(rbm,chain_start)
      else:
          nv_means, nv_samples,\
          nh_means, nh_samples = gibbs_hvh(rbm,nh_samples)
  
  dW_2 = numpy.zeros((n_visible,n_hidden))
  dvbias_2 = numpy.zeros(n_visible)
  dhbias_2 = numpy.zeros(n_hidden)

  for step in range(r):
    print("Sampling r={}".format(step))
    nv_means, nv_samples,\
    nh_means, nh_samples = gibbs_hvh(rbm,nh_samples)
    dW_2 += numpy.dot(nv_samples.T, nh_means)
    dvbias_2 += numpy.sum(nv_samples, axis=0)
    dhbias_2 += numpy.sum(nh_means, axis=0)

  # chain_end = nv_samples
  rbm.W += lr * (numpy.dot(rbm.input.T, ph_mean)
                  - dW_2)
  rbm.vbias += lr * numpy.mean(rbm.input - dvbias_2, axis=0)
  rbm.hbias += lr * numpy.mean(ph_sample - dhbias_2, axis=0)

def sample_h_given_v(rbm, v0_sample):
    h1_mean = propup(rbm,v0_sample)
    h1_sample = numpy_rng.binomial(size=h1_mean.shape, n=1, p=h1_mean)
    return [h1_mean, h1_sample]


def sample_v_given_h(rbm, h0_sample):
    v1_mean = propdown(rbm,h0_sample)
    v1_sample = numpy_rng.binomial(size=v1_mean.shape, n=1, p=v1_mean) 
    return [v1_mean, v1_sample]

def propup(rbm, v):
    pre_sigmoid_activation = np.dot(v, rbm.W) + rbm.hbias
    return sigmoid(pre_sigmoid_activation)

def propdown(rbm, h):
    pre_sigmoid_activation = np.dot(h, rbm.W.T) + rbm.vbias
    return sigmoid(pre_sigmoid_activation)


def gibbs_hvh(rbm, h0_sample):

    v1_mean, v1_sample = sample_v_given_h(rbm,h0_sample)
    h1_mean, h1_sample = sample_h_given_v(rbm,v1_sample)

    return [v1_mean, v1_sample,
            h1_mean, h1_sample]

def get_hidden_reps(rbm, v,t):
    h_v = sigmoid(np.dot(v, rbm.W) + rbm.hbias)
    h_t = sigmoid(np.dot(t, rbm.W) + rbm.hbias)
    #reconstructed_v = sigmoid(np.dot(h, rbm.W.T) + rbm.vbias)
    return h_v,h_t
        

def train_rbm(rbm, v,t,learning_rate=0.05, k = 200, r=10):
    gibbs_sampling(rbm,lr=learning_rate,k=k,r=r)
    return get_hidden_reps(rbm,v,t)

if __name__ == "__main__":
    # construct RBM
    n_visible=784
    n_hidden=32
    rbm = RBM(input=train_data, n_visible=n_visible, n_hidden=n_hidden)
    for epoch in range(5):
      hidden_rep,hidden_rep_test= train_rbm(rbm,val_data,test_data,k=10,r=1)
      print('Training epoch: %d' %epoch)
      
      #W_l, b_l = train(hidden_rep,val_labels,W_l,b_l,lr=0.1)
      loss, accuracy = train_test(hidden_rep, val_labels, hidden_rep_test,test_labels)
      print('Loss:{} Accuracy:{}'.format(loss,accuracy))

Sampling k=0
Sampling k=1
Sampling k=2
Sampling k=3
Sampling k=4
Sampling k=5
Sampling k=6
Sampling k=7
Sampling k=8
Sampling k=9
Sampling r=0
Training epoch: 0


  y = column_or_1d(y, warn=True)


Loss:1.9945485598547896 Accuracy:0.2484
Sampling k=0
Sampling k=1
Sampling k=2
Sampling k=3
Sampling k=4
Sampling k=5
Sampling k=6
Sampling k=7
Sampling k=8
Sampling k=9
Sampling r=0
Training epoch: 1


  y = column_or_1d(y, warn=True)


Loss:1.7218605155431659 Accuracy:0.3158
Sampling k=0
Sampling k=1
Sampling k=2
Sampling k=3
Sampling k=4
Sampling k=5
Sampling k=6
Sampling k=7
Sampling k=8
Sampling k=9
Sampling r=0
Training epoch: 2


  y = column_or_1d(y, warn=True)


Loss:1.944450082494686 Accuracy:0.2678
Sampling k=0
Sampling k=1
Sampling k=2
Sampling k=3
Sampling k=4
Sampling k=5
Sampling k=6
Sampling k=7
Sampling k=8
Sampling k=9
Sampling r=0
Training epoch: 3


  y = column_or_1d(y, warn=True)


Loss:1.754669859877737 Accuracy:0.3433
Sampling k=0
Sampling k=1
Sampling k=2
Sampling k=3
Sampling k=4
Sampling k=5
Sampling k=6
Sampling k=7
Sampling k=8
Sampling k=9
Sampling r=0
Training epoch: 4


  y = column_or_1d(y, warn=True)


Loss:2.15976536251928 Accuracy:0.2031
