In [20]:
import pandas as pd
import io
import requests
import numpy as np
import math
from tqdm.notebook import tqdm
from numpy import linalg as LA
import matplotlib.pyplot as plt

In [21]:
# load text from the given URL
# input：
#   url: string
# output:
#   c: pandas
def URL_to_dataframe(url):
    s = requests.get(url).content
    c = pd.read_csv(io.StringIO(s.decode('utf-8')), header=None, delim_whitespace=True)
    
    return c

In [22]:
# tanh function for matrix
# input:
#   matrix: numpy array
# output:
#   numpy array
def tanh_matrix(matrix):
    return ((np.exp(matrix)-np.exp(-matrix))/(np.exp(matrix)+np.exp(-matrix)))

In [23]:
# deravertive of tanh function
# input:
#   matrix: numpy array
# output:
#   numpy array
def dertanh(matrix): 
    return (1 -np.tanh(matrix)** 2)

In [24]:
class auto_encoder():
    def __init__(self, input_dim, hidden_dim, learning_rate, constraint=False):
        # initialize hyperparameter
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.learning_rate = learning_rate
        self.constraint = constraint
        
        # initialize weight and bias
        self.W1 = self.weight_init(1, input_dim, hidden_dim)
        self.B1 = self.weight_init(1, 1, hidden_dim)
        if constraint: self.W2 = np.transpose(self.W1)
        else: self.W2 = self.weight_init(2, hidden_dim, input_dim)
        self.B2 = self.weight_init(2, 1, input_dim)
    
    # output:
    #   self.error(x[:,1:], s2)
    #      x[:,1:]: the input feature without bias
    #      s2: the result predicted by model
    def forward(self, x):
        # first layer
        s1 = np.matmul(x, np.concatenate((self.B1, self.W1),0))        
        x1 = np.concatenate((np.ones(shape=(s1.shape[0], 1)), np.tanh(s1)), axis=1)
        # second layer
        s2 = np.matmul(x1, np.concatenate((self.B2, self.W2), 0))
        return s1, x1, s2, self.error(x[:,1:], s2)
    
    def train_an_epoch(self, x):
        # forward 
        s1, x1, s2,_ = self.forward(x)
        
        # backward
        # calculating delta
        delta2 = -2 * (x[:,1:] - s2)                            # N * d
        delta1 = np.matmul(delta2, np.transpose(self.W2)) * dertanh(s1)
        
        # calculating gradient
        d_W2 = (1/x.shape[0]) * (1/self.input_dim) * np.matmul(np.transpose(x1), delta2)              # ((d~+1) * d)
        d_W1 = (1/x.shape[0]) * (1/self.input_dim) *np.matmul(np.transpose(x), delta1)
        
        # update weight and bias accroding to evaluated gradient
        self.W2 = self.W2 - self.learning_rate * d_W2[1:,:]
        self.B2 = self.B2 - self.learning_rate * d_W2[:1,:]
        if self.constraint:
            self.W1 = np.transpose(self.W2) - self.learning_rate * d_W1[1:,:]
            self.W2 = np.transpose(self.W1)
        else:
            self.W1 = self.W1 - self.learning_rate * d_W1[1:,:]
        self.B1 = self.B1 - self.learning_rate * d_W1[:1,:]
        
        return self.error(x[:,1:], s2)
        
    def weight_init(self, layer_index, input_dim, output_dim):
        U = math.sqrt(6/(1+input_dim+output_dim))
        return np.random.uniform(-U, U, size=(input_dim, output_dim))
    
    # loss function
    def error(self, x, y):  
        diff_square = (x-y) * (x-y)
        return (1/x.shape[0]) * ((1/x.shape[1]) * np.sum(np.sum(diff_square, axis=1),axis=0))
    

In [25]:
train_url = "http://amlbook.com/data/zip/zip.train"
test_url = "http://amlbook.com/data/zip/zip.test"

In [26]:
train_df = URL_to_dataframe(train_url)
test_df = URL_to_dataframe(test_url)

In [27]:
train_np = train_df.to_numpy()
test_np = test_df.to_numpy()

In [28]:
print(train_np.shape)
print(test_np.shape)

(7291, 257)
(2007, 257)


In [10]:
train_np[:,0] = 1
test_np[:,0] = 1

In [11]:
print(train_np.shape)
print(test_np.shape)

(7291, 257)
(2007, 257)


In [12]:
# 11, 12
EPOCH = 5000
log_2_hidden_layers = [1,2,3,4,5,6,7]

# reserve calculated error
Eins_11 = []
Eouts_12 = []

for log_2_hidden_layer in log_2_hidden_layers:
    hidden_layer = 2**(log_2_hidden_layer)
    model = auto_encoder(train_np.shape[1]-1, hidden_layer, 0.1, False)
    #total_Ein = 0
    #total_Eout = 0
    
    pbar = tqdm(total=EPOCH)
    for epoch in range(EPOCH):
        Ein = model.train_an_epoch(train_np)
        
        #_, _, _, Eout = model.forward(test_np)
        #total_Ein += Ein
        #total_Eout += Eout
        pbar.update(1)
    _, _, _, Ein = model.forward(train_np)
    _, _, _, Eout = model.forward(test_np)
    
    Eins_11.append(Ein)
    Eouts_12.append(Eout)
    
    #average_Eins.append(total_Ein/5000)
    #average_Eouts.append(total_Eout/5000)

HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

KeyboardInterrupt: 

In [None]:
print(Eins_11)
print(Eouts_12)

In [None]:
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eins_11, 'o-', color='r', label="Ein")
plt.plot(log_2_hidden_layers, Eouts_12, 'o-', color='b', label="Eout")
plt.title("autoencoder without constraint", x=0.5, y=1.03)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("d", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 13, 14
EPOCH = 5000
log_2_hidden_layers = [1,2,3,4,5,6,7]
Eins_13 = []
Eouts_14 = []
for log_2_hidden_layer in log_2_hidden_layers:
    hidden_layer = 2**(log_2_hidden_layer)
    model = auto_encoder(train_np.shape[1]-1, hidden_layer, 0.1, True)
    #total_Ein = 0
    #total_Eout = 0
    pbar = tqdm(total=EPOCH)
    for epoch in range(EPOCH):
        Ein = model.train_an_epoch(train_np)
        #_, _, _, Eout = model.forward(test_np)
        #total_Ein += Ein
        #total_Eout += Eout
        pbar.update(1)
    
    _, _, _, Ein = model.forward(train_np)
    _, _, _, Eout = model.forward(test_np)
    Eins_13.append(Ein)
    Eouts_14.append(Eout)
    #average_Eins.append(total_Ein/5000)
    #average_Eouts.append(total_Eout/5000)

In [None]:
print(Eins_13)
print(Eouts_14)

In [None]:
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, average_Eins, 'o-', color='r', label="Ein")
plt.plot(log_2_hidden_layers, average_Eouts, 'o-', color='b', label="Eout")
plt.title("autoencoder with constraint", x=0.5, y=1.03)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("d", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 15, 16

In [None]:
# delete the first feature
X_train = np.delete(train_np, 0, 1) 
X_test = np.delete(test_np, 0, 1)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
class PCA():
    def __init__(self, X, d_hidden):
        self.X = X
        self.d_hidden = d_hidden
        self.x_average = (np.sum(X, 0) / X.shape[0]).reshape(1,-1)
        self.x_normalized = X - self.x_average
        self.w = self.w_evaluator(self.x_normalized)
        
    def w_evaluator(self, x_normalized):
        XtX = np.matmul(np.transpose(x_normalized), x_normalized)
        eigen_value, eigen_vector = LA.eig(XtX)
        eigen_value_sort = np.argsort(-eigen_value)
        sorted_eigen_vector = eigen_vector[:,eigen_value_sort]
        d_hidden_eigen_vector = sorted_eigen_vector[:,:self.d_hidden]
        
        return d_hidden_eigen_vector
    
    def linear_autoencoder_train(self):
        return np.matmul(np.matmul(self.x_normalized, self.w), np.transpose(self.w)) + self.x_average
    
    def loss_train(self):
        transfered_x = self.linear_autoencoder_train()
        squre_diff = (self.X - transfered_x) * (self.X - transfered_x)
        return (1/self.X.shape[0]) * (1/self.X.shape[1]) * np.sum(np.sum(squre_diff, 1), 0)
    
    def linear_autoencoder_test(self, X_test):
        X_test_average = (np.sum(X_test, 0) / X_test.shape[0]).reshape(1,-1)
        X_test_normalized = X_test - X_test_average
        return np.matmul(np.matmul(X_test_normalized, self.w), np.transpose(self.w)) + X_test_average
    
    def loss_test(self, X_test):
        transfered_x_test = self.linear_autoencoder_test(X_test)
        squre_diff = (X_test - transfered_x_test) * (X_test - transfered_x_test)
        return (1/X_test.shape[0]) * (1/X_test.shape[1]) * np.sum(np.sum(squre_diff, 1), 0)
        
        
    

In [None]:
model = PCA(X_train, 8)
print(model.loss_train())
print(model.loss_test(X_test))

In [None]:
# 15, 16
log_2_hidden_layers = [1,2,3,4,5,6,7]
Eins_15 = []
Eouts_16 = []
for log_2_hidden_layer in log_2_hidden_layers:
    hidden_layer = 2**(log_2_hidden_layer)
    model = PCA(X_train, hidden_layer)
    
    Eins_15.append(model.loss_train())
    Eouts_16.append(model.loss_test(X_test))

In [None]:
print(Eins_15)
print(Eouts_16)

In [None]:
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eins, 'o-', color='r', label="Ein")
plt.plot(log_2_hidden_layers, Eouts, 'o-', color='b', label="Eout")
plt.title("PCA", x=0.5, y=1.03)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("d", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# draw

In [None]:
# 11
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eins_11, 'o-', color='r', label="Ein_11")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 12
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eouts_12, 'o-', color='b', label="Eout_12")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 13
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eins_11, 'o-', color='b', label="Eins_11")
plt.plot(log_2_hidden_layers, Eins_13, 'o-', color='r', label="Eins_13")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 14
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eouts_12, 'o-', color='b', label="Eouts_12")
plt.plot(log_2_hidden_layers, Eouts_14, 'o-', color='r', label="Eouts_14")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 15
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eins_13, 'o-', color='b', label="Eins_13")
plt.plot(log_2_hidden_layers, Eins_15, 'o-', color='r', label="Eins_15")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()

In [None]:
# 16
plt.figure(figsize=(15, 10), dpi=100, linewidth=2)
plt.plot(log_2_hidden_layers, Eouts_14, 'o-', color='b', label="Eouts_14")
plt.plot(log_2_hidden_layers, Eouts_16, 'o-', color='r', label="Eouts_16")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("$log2\widetilde{d}$", fontsize=30, labelpad=15)
#plt.xlabel("d", fontsize=30, labelpad=15)
plt.ylabel("error", fontsize=30, labelpad=20)
plt.legend(loc="best", fontsize=20)
plt.show()