In [None]:
# curve fit - third degree
import numpy as np
x = [1, 2, 3, 4, 5]
y = [i ** 3 + 1 for i in x]
features = [[1] + [i ** j for j in range(1, 4)] for i in x]
features, labels = np.array(features), np.array(y)
theta = np.array([0, 0, 0, 0])

def hypothesis_(features, weights):
    return np.matmul(features, weights)
def cost_(hypothesis, labels):
    return 1/2 * ((hypothesis - labels) ** 2).sum()
def derivative(features, hypothesis, labels):
    return np.matmul(np.transpose(features), (hypothesis - labels))

EPOCHS = 100000
learning_rate = 0.00005
for step in range(EPOCHS + 1):
    hypothesis = hypothesis_(features, theta)
    delta = learning_rate * derivative(features, hypothesis, labels)
    theta = theta - delta
    cost = cost_(hypothesis, labels)
    if step % 10000 == 0:
        print("{:6} : cost {:10.4f}".format(step, cost), theta)
test_val = 127
x_test = np.array([1] + [test_val ** j for j in range(1, 4)])
print(np.matmul(x_test, theta))

     0 : cost 10485.0000 [0.0115 0.0497 0.224  1.037 ]
 10000 : cost     0.1350 [0.28231799 0.20141375 0.08552008 0.97981144]
 20000 : cost     0.0510 [ 0.44256135  0.28344872 -0.00687424  0.99406892]
 30000 : cost     0.0200 [ 0.54063276  0.33206917 -0.06250554  1.00267296]
 40000 : cost     0.0086 [ 0.6009836   0.36042798 -0.09583878  1.00784784]
 50000 : cost     0.0044 [ 0.6384465   0.37650382 -0.11564864  1.01094286]
 60000 : cost     0.0029 [ 0.66201918  0.38513662 -0.1272579   1.01277652]
 70000 : cost     0.0023 [ 0.67715985  0.38926229 -0.13389547  1.01384534]
 80000 : cost     0.0020 [ 0.68717957  0.39066175 -0.13751999  1.01445049]
 90000 : cost     0.0019 [ 0.69408692  0.39041526 -0.1393196   1.01477469]
100000 : cost     0.0019 [ 0.69910039  0.38917809 -0.14001503  1.01492875]
2076754.6278873093


In [None]:
import numpy as np

x = np.array([[1, 1, 2],
     [1, 2, 3],
     [1, 3, 5]])
y = np.array([3, 5, 8])
theta = np.random.rand(3)

def hypothesis(features, weights):
    return np.matmul(features, weights)
def cost(hypothesis, labels):
    return (1/2) * ((hypothesis - labels) ** 2).sum()
def grad(features, hypothesis, labels):
    return np.matmul(np.transpose(features), (hypothesis - labels))
def gradient_descent(features, labels, weights, learning_rate, EPOCHS):
    for step in range(EPOCHS + 1):
        hypo = hypothesis(features, weights)
        delta = grad(features, hypo, labels)
        weights = weights - learning_rate * delta
        loss = cost(hypo, labels)
        if step % (EPOCHS / 10) == 0:
            print("{:4} : cost {:10.4f}".format(step, loss))
    return weights
def test(features, weights, answer):
    print("expect: {:5.4f} | error rate: {:5.4f} %".format(np.matmul(features, weights), np.abs(np.matmul(features, weights) - answer) / answer * 100))

theta = gradient_descent(x, y, theta, 0.01, 1000)
test([1, 10, 20], theta, 30)

   0 : cost    12.8123
 100 : cost     0.1006
 200 : cost     0.0482
 300 : cost     0.0232
 400 : cost     0.0114
 500 : cost     0.0057
 600 : cost     0.0030
 700 : cost     0.0017
 800 : cost     0.0010
 900 : cost     0.0007
1000 : cost     0.0005
expect: 30.2147 | error rate: 0.7158 %


In [None]:
# Logistic Regression
import numpy as np
x = [[0, 1],
     [1, 3],
     [1, 4],
     [5, 1],
     [3, 2],
     [4, 2]]
y = [0, 0, 0, 1, 1, 1]

class classification:
    def __init__(self, input, output, dim, lr, epochs):
        self.learning_rate = lr
        self.EPOCHS = epochs
        self.features = np.array([[1] + xs for xs in x])
        self.labels = np.array(output)
        self.weights = np.random.rand(dim)
    def sigmoid(self, z):
        denominator = 1 + np.exp(-1 * z)
        return 1 / denominator
    def hypothesis(self):
        return self.sigmoid(np.matmul(self.features, self.weights))
    def cost(self, hypothesis, labels):
        error = np.matmul(labels, np.log(hypothesis)) + np.matmul(1 - labels, np.log(1 - hypothesis))
        return -1 * error
    def grad(self, features, hypothesis, labels):
        derivative = np.matmul(np.transpose(features), hypothesis - labels)
        return self.learning_rate * derivative
    def logistic_regression(self):
        for step in range(self.EPOCHS + 1):
            hypothesis = self.hypothesis()
            grads = self.grad(self.features, hypothesis, self.labels)
            self.weights = self.weights - grads
            cost = self.cost(hypothesis, self.labels)
            if step % (self.EPOCHS / 10) == 0:
                print("{:5} | cost: {:10.4f}".format(step, cost))
model = classification(x, y, 3, 0.01, 100)
model.logistic_regression()

    0 | cost:     9.3974
   10 | cost:     3.8691
   20 | cost:     2.1395
   30 | cost:     1.6620
   40 | cost:     1.3957
   50 | cost:     1.2111
   60 | cost:     1.0732
   70 | cost:     0.9657
   80 | cost:     0.8791
   90 | cost:     0.8078
  100 | cost:     0.7478


In [17]:
# MNIST - Convert into a binary classification problem by just classifying '2' or not
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()
images_train = np.reshape(images_train / 255.0 / 10, (len(images_train), len(images_train[0]) * len(images_train[0][0]))).T
images_test = np.reshape(images_test / 255.0 / 10, (len(images_test), len(images_test[0]) * len(images_test[0][0]))).T

def convert_to_binary(data):
    converted = np.zeros(len(data))
    for idx in range(len(data)):
        if data[idx] == 2:
            converted[idx] = 1
    return converted

labels_train, labels_test = convert_to_binary(labels_train), convert_to_binary(labels_test)

class LogisticRegression():
    def __init__(self, n):
        self.weights = np.random.rand(n)
        self.bias = np.random.rand(1)
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def hypothesis(self, features):
        linear_hypo = np.matmul(self.weights, features) + self.bias
        return self.sigmoid(linear_hypo)
    def cost(self, hypo, labels):
        loss = np.multiply(labels, np.log(hypo)) + np.multiply(1 - labels, np.log(1 - hypo))
        return -np.average(loss)
    def grad(self, features, hypo, labels):
        dw = np.matmul(features, (hypo - labels).T)
        db = np.average(hypo - labels)
        return dw, db
    def train(self, features, labels, learning_rate, EPOCHS):
        for iter in range(EPOCHS + 1):
            hypothesis = self.hypothesis(features)
            cost = self.cost(hypothesis, labels)
            if iter % (EPOCHS / 10) == 0:
                print("iter: {:4} || cost: {:10.5f}".format(iter, cost))

            dw, db = self.grad(features, hypothesis, labels)
            self.weights = self.weights - dw * learning_rate
            self.bias = self.bias - db * learning_rate
    def test_accuracy(self, features, labels):
        hypothesis = self.hypothesis(features)
        hypothesis = np.where(hypothesis > 0.5, 1, 0)
        prob = np.average(hypothesis == labels) * 100
        print("Test Accuracy: {:5.2f}%".format(prob))

model = LogisticRegression(len(images_train))
model.train(images_train, labels_train, 0.001, 1000)
model.test_accuracy(images_test, labels_test)

iter:    0 || cost:    5.08180
iter:  100 || cost:    0.09959
iter:  200 || cost:    0.09046
iter:  300 || cost:    0.08646
iter:  400 || cost:    0.08412
iter:  500 || cost:    0.08254
iter:  600 || cost:    0.08137
iter:  700 || cost:    0.08047
iter:  800 || cost:    0.07973
iter:  900 || cost:    0.07911
iter: 1000 || cost:    0.07857
Test Accuracy: 97.91%


#These are wrong - need to use softmax instead sigmoid because y has various labels

In [None]:
# MNIST - Logistic Regression
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()
images_train = np.reshape(images_train, (60000, 784))

def one_hot_encoding(array, dim):
    one_hot = [[0] for i in range(len(array))]
    idx = 0
    for x in array:
        temp = [0] * dim
        temp[x] = 1
        one_hot[idx] = temp
        idx = idx + 1
    return one_hot
def normalize(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data)) / 100

class logistic_mnist:
    def __init__(self, input, output, lr, epochs):
        self.learning_rate = lr
        self.EPOCHS = epochs
        self.features = normalize(np.array(np.reshape(input, (60000, 784))))
        self.labels = np.array(one_hot_encoding(output, 10))
        self.weights = np.random.rand(784, 10)
    def sigmoid(self, z):
        denominator = 1 + np.exp(-1 * z)
        return 1 / denominator
    def hypothesis(self, features):
        return self.sigmoid(np.matmul(features, self.weights))
    def cost(self, hypothesis, labels):
        error = (np.multiply(labels, np.log(hypothesis))).sum() + (np.multiply(1 - labels, np.log(1 - hypothesis))).sum()
        return -1 * error
    def grad(self, features, hypothesis, labels):
        derivative = np.matmul(np.transpose(features), hypothesis - labels)
        return self.learning_rate * derivative
    def logistic_regression(self):
        for step in range(self.EPOCHS + 1):
            hypothesis = self.hypothesis(self.features)
            grads = self.grad(self.features, hypothesis, self.labels)
            self.weights = self.weights - grads
            cost = self.cost(hypothesis, self.labels)
            if step % (self.EPOCHS / 10) == 0:
                print("{:5} | cost: {:10.4f}".format(step, cost))
model = logistic_mnist(images_train, labels_train, 0.1, 1000)
model.logistic_regression()
#지금 cost값이 45000정도인데 batch값을 좀 줄이면 더 줄일 수 있을듯 싶다
#normalization에서 100을 나누었는데 이게 가능한건지는 좀 더 생각해봐야함

    0 | cost: 559299.7190
  100 | cost: 56614.4776
  200 | cost: 52000.6689
  300 | cost: 49982.4802
  400 | cost: 48778.4754
  500 | cost: 47950.2652
  600 | cost: 47331.1060
  700 | cost: 46842.2587
  800 | cost: 46441.2161
  900 | cost: 46102.8035
 1000 | cost: 45811.0528


In [None]:
# MNIST - Logistic Regression
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()

class logistic_mnist:
    def __init__(self, input, output, lr, epochs):
        self.learning_rate = lr
        self.EPOCHS = epochs
        #self.features = self.normalize(np.array(np.reshape(input, (60000, 784))))
        #self.labels = np.array(self.one_hot_encoding(output, 10))
        self.features, self.labels = self.preprocess(input, output)
        self.weights = np.random.rand(784, 10)
    def one_hot_encoding(self, array, dim):
        one_hot = [[0] for i in range(len(array))]
        idx = 0
        for x in array:
            temp = [0] * dim
            temp[x] = 1
            one_hot[idx] = temp
            idx = idx + 1
        return one_hot
    def normalize(self, data):
        # 기존 MNIST data는 0~255값을 가지기에 편차가 큼 -> sigmoid값이 1이됨 -> log(1 - hypothesis)이 nan / 따라서 전체를 normalize 후 10으로 나누었는데 이게 가능?
        # return (data - np.min(data)) / (np.max(data) - np.min(data)) / 10 <=> data / 255 / 10
        return data / 255.0 / 10
    def preprocess(self, features_before, labels_before):
        features = self.normalize(np.array(np.reshape(features_before, (len(features_before), 784))))
        labels = np.array(self.one_hot_encoding(labels_before, 10))
        return features, labels
    def sigmoid(self, z):
        denominator = 1 + np.exp(-1 * z)
        return 1 / denominator
    def hypothesis(self, features):
        return self.sigmoid(np.matmul(features, self.weights))
    def cost(self, hypothesis, labels):
        # 어차피 elementwise 곱이 필요한것이기 때문에 굳이 matmul 후에 sum 할 필요 없이 multiply를 쓰는 것이 더 낫다고 판단(onehot encoding때문에)
        error = (np.multiply(labels, np.log(hypothesis))).sum() + (np.multiply(1 - labels, np.log(1 - hypothesis))).sum()
        return -1 * error
    def grad(self, features, hypothesis, labels):
        derivative = np.matmul(np.transpose(features), hypothesis - labels)
        return self.learning_rate * derivative
    def logistic_regression(self, batch_size):
        for step in range(self.EPOCHS + 1):
            for batch in range(int(len(self.features) / batch_size)):
                features = self.features[(batch * batch_size):((batch + 1) * batch_size)]
                labels = self.labels[(batch * batch_size):((batch + 1) * batch_size)]
                hypothesis = self.hypothesis(features)
                grads = self.grad(features, hypothesis, labels)
                self.weights = self.weights - grads
                cost = self.cost(hypothesis, labels)
            if step % (self.EPOCHS / 10) == 0:
                print("{:5} | cost: {:10.4f}".format(step, cost))
    def test_accuracy(self, test_features, test_labels):
        features = np.array(self.normalize(np.reshape(test_features, (10000, 784))))
        labels = np.array(self.one_hot_encoding(test_labels, 10))
        hypothesis = self.hypothesis(features)
        total = 0
        for idx in range(len(labels)):
            if np.argmax(hypothesis[idx]) == np.argmax(labels[idx]):
                total = total + 1
        print("Test Accuracy: {:10.4f}%".format(total / len(labels) * 100))
model = logistic_mnist(images_train, labels_train, 0.4, 100)
model.logistic_regression(batch_size=10)
model.test_accuracy(images_test, labels_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
    0 | cost:     4.3828
   10 | cost:     2.8118
   20 | cost:     2.6792
   30 | cost:     2.6073
   40 | cost:     2.5558
   50 | cost:     2.5175
   60 | cost:     2.4888
   70 | cost:     2.4672
   80 | cost:     2.4510
   90 | cost:     2.4389
  100 | cost:     2.4298
Test Accuracy:    91.3100%
