In [81]:
import numpy as np

from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [451]:
def linear(x):
    return x

def relu(x):
    return np.where(x > 0, x, 0)

def sigmoid(x):
    return 1/(1+np.exp(-x))

def tanh(x):
    return (1-np.exp(x))/(1+np.exp(x))

def softmax(x):
    x -= np.max(x,1).reshape(x.shape[0],-1)
    return np.exp(x)/np.sum(np.exp(x),1).reshape(x.shape[0],-1)

def rmse(y_hat, y):
    return np.mean(np.square(y_hat - y))

def binary_crossentropy(y_hat, y):
    epsilon = 1e-7
    return np.mean((1-y)*np.log(1-y_hat+epsilon) + y*np.log(y_hat+epsilon))

def categorical_crossentropy(y_hat, y):
    epsilon = 1e-7
    return -np.mean(y*np.log(y_hat+epsilon))

def make_onehot(x):
    result = np.zeros((x.size, np.max(x)+1))
    for idx, val in enumerate(x):
        result[idx,val] = 1
    return result

In [34]:
x = np.array([[1,2,1000],[1000,5,6]])

In [35]:
np.sum(softmax(x),1), softmax(x)

(array([1., 1.]),
 array([[0., 0., 1.],
        [1., 0., 0.]]))

In [584]:
class SequentialModel:
    def __init__(self,x,y,output_shape, activation='linear'):
        self.activation_dic = {
            'linear' : linear,
            'sigmoid' : sigmoid,
            'relu' : relu,
            'softmax' : softmax
        }
        self.loss_func_dic = {
            'rmse' : rmse,
            'binary_crossentropy' : binary_crossentropy,
            'categorical_crossentropy' : categorical_crossentropy
        }
        self.x = x
        self.y = y
        self.activation = [activation]
        self.W = [[np.random.randn(self.x.shape[1], output_shape), 
                 np.zeros(output_shape)]]
        self.dW = [[np.zeros((self.x.shape[1], output_shape)),
                  np.zeros(output_shape)]
]
    
    def add(self,output,activation='linear'):
        self.input = self.W[-1][0].shape[1]
        self.W.append([np.random.randn(self.input,output),np.zeros(output)])
        self.dW.append([np.zeros((self.input,output)),np.zeros(output)])
        self.activation.append(activation)
        
    def predict(self):
        self.y_hat = self.x.copy()
        for i in range(len(self.W)):
            self.y_hat = np.dot(self.y_hat, self.W[i][0]) + self.W[i][1]
            self.y_hat = self.activation_dic[self.activation[i]](self.y_hat)
        return self.y_hat
    
    def compile(self, loss):
        self.loss_func = self.loss_func_dic[loss]
        
    def summary(self):
        print("------------------------------")
        print("Output Shape           Param #")
        print("==============================")
        total_params = 0
        for idx, W in enumerate(self.W):
            tmp_length = 38- int(6 + len(str(W[0].shape[1])))
            print(f'(None, {W[0].shape[1]})' f'{W[0].shape[0]*W[0].shape[1] + W[0].shape[1]:^{tmp_length}}\n')
            total_params += W[0].shape[0]*W[0].shape[1] + W[0].shape[1]
        print("==============================")
        print(f"total params: {total_params}")
        
    def loss(self):
        s = time()
        y_hat = self.predict()
        y = self.y
        self.loss_val = self.loss_func(y_hat, y)
        e = time()
        self.predict_elapse = (e - s)
        self.forword_elapse = self.predict_elapse
        return self.loss_val
    
    def gradient(self):
        for layer in range(len(self.W)):
            h = 1e-5 
            weight = 0
            bias = 1
            rows = range(self.W[layer][weight].shape[0])
            cols = range(self.W[layer][weight].shape[1])
            print(f'{rows[-1]} * {cols[-1]} 번 돌아야합니다.')
            for row in rows:
                for col in cols:
                    print(f'{layer+1}-{str(row+1)}-{str(col+1)}')
                    fx = self.loss()
                    self.W[layer][weight][row,col] += h
                    fxh = self.loss()
                    self.dW[layer][weight][row,col] = (fxh-fx)/h
                    self.W[layer][weight][row,col] -= h
            for b_idx in range(self.W[layer][bias].size):
                fx = self.loss()
                self.W[layer][bias][b_idx] += h
                fxh = self.loss()
                self.dW[layer][bias][b_idx] = (fxh-fx)/h
                self.W[layer][bias][b_idx] -= h    
    
    def descent_gradient(self,lr=1e-3):
        self.lr = lr
        weight = 0
        bias = 1
        layers = range(len(self.W))
        self.gradient()
        for layer in layers:
            self.W[layer][weight] -= self.dW[layer][weight]*self.lr
            self.W[layer][bias] -= self.dW[layer][bias]*self.lr
        
    def fit(self,lr,epochs=100):
        for epoch in range(epochs):
            print(f'{epoch+1}번째 진행 중..')
            self.descent_gradient(lr)
        

In [507]:
y = np.random.randint(0,4,100)
y_one = make_onehot(y)
model = SequentialModel(np.random.randn(100,10),y_one,100)
model.add(40, activation='relu')
model.add(4, activation='softmax')

In [454]:
model.compile('categorical_crossentropy')

In [455]:
model.summary()

------------------------------
Output Shape           Param #
(None, 100)            1100             

(None, 40)             4040             

(None, 4)              164              

total params: 5304


In [461]:
model.fit(epochs=20, lr=1e-3)

In [462]:
model.loss()

2.550166278369687

In [463]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [464]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [478]:
X_train_ = X_train.reshape(-1,28*28)
X_test_ = X_test.reshape(-1,28*28)
y_train_ = to_categorical(y_train)
y_test_ = to_categorical(y_test)

In [508]:
tf_model = Sequential()

input_layer = Dense(256,activation='relu',input_shape=(784,))
layer1 = Dense(128, activation='relu')
output_layer = Dense(10,activation='softmax')

tf_model.add(input_layer)
tf_model.add(layer1)
tf_model.add(output_layer)

In [509]:
tf_model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [510]:
tf_model.fit(X_train_, y_train_,epochs=10)

Epoch 1/10

KeyboardInterrupt: 

In [484]:
tf_model.evaluate(X_test_,y_test_)



[0.6561394929885864, 0.9153000116348267]

In [486]:
tf_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               200960    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 235146 (918.54 KB)
Trainable params: 235146 (918.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [585]:
our_model = SequentialModel(X_train_, y_train_, 256, activation='relu')

In [586]:
our_model.add(128, activation='relu')
our_model.add(10, activation='softmax')
our_model.summary()

------------------------------
Output Shape           Param #
(None, 256)           200960            

(None, 128)            32896            

(None, 10)             1290             

total params: 235146


In [587]:
our_model.compile(loss='categorical_crossentropy')

In [588]:
our_model.fit(lr=1e-3, epochs=2)

1번째 진행 중..
783 * 255 번 돌아야합니다.
1-1-1
1-1-2
1-1-3
1-1-4
1-1-5
1-1-6
1-1-7
1-1-8
1-1-9
1-1-10
1-1-11
1-1-12
1-1-13
1-1-14
1-1-15
1-1-16
1-1-17
1-1-18
1-1-19
1-1-20
1-1-21
1-1-22
1-1-23
1-1-24
1-1-25
1-1-26
1-1-27
1-1-28
1-1-29
1-1-30
1-1-31
1-1-32
1-1-33
1-1-34
1-1-35
1-1-36
1-1-37
1-1-38
1-1-39
1-1-40
1-1-41
1-1-42
1-1-43
1-1-44
1-1-45
1-1-46
1-1-47
1-1-48
1-1-49
1-1-50
1-1-51
1-1-52
1-1-53
1-1-54
1-1-55
1-1-56
1-1-57
1-1-58
1-1-59
1-1-60
1-1-61
1-1-62
1-1-63
1-1-64
1-1-65
1-1-66
1-1-67
1-1-68
1-1-69
1-1-70
1-1-71
1-1-72
1-1-73
1-1-74
1-1-75
1-1-76
1-1-77
1-1-78
1-1-79
1-1-80
1-1-81
1-1-82
1-1-83
1-1-84
1-1-85
1-1-86
1-1-87
1-1-88
1-1-89
1-1-90
1-1-91
1-1-92
1-1-93
1-1-94
1-1-95
1-1-96
1-1-97
1-1-98
1-1-99
1-1-100
1-1-101
1-1-102
1-1-103
1-1-104
1-1-105
1-1-106
1-1-107
1-1-108
1-1-109
1-1-110
1-1-111
1-1-112
1-1-113
1-1-114
1-1-115
1-1-116
1-1-117
1-1-118
1-1-119
1-1-120
1-1-121
1-1-122
1-1-123
1-1-124
1-1-125
1-1-126
1-1-127
1-1-128
1-1-129
1-1-130
1-1-131
1-1-132
1-1-133
1-1-134
1-1-1

KeyboardInterrupt: 

In [505]:
from time import time

In [474]:
np.cumprod(X_train.shape[1:])[-1]

784

In [641]:
x = np.random.randn(100000,1000)
w = np.random.randn(1000,1)

In [642]:
y = np.random.randint(0,1,100000).reshape(-1,1)

In [643]:
x.shape, w.shape, y.shape

((100000, 1000), (1000, 1), (100000, 1))

In [648]:
def loss(x,y):
    epsilon = 1e-7
    y_hat = np.dot(x,w)
    y_hat = sigmoid(y_hat)
    return -np.mean((1-y+epsilon) * np.log(1-y_hat+epsilon) + y * np.log(y_hat+epsilon))

In [649]:
s = time()
dW = np.zeros_like(w)
h = 1e-5
rows = range(w.shape[0])
cols = range(w.shape[1])
for row in rows:
    for col in cols:
        fx = loss(x,y)
        w[row,col] += h
        fxh = loss(x,y)
        dW[row,col] = (fxh-fx)/h
        w[row,col] -= h
e = time()
elapse = e - s

In [650]:
dW

array([[ 1.36434259e-03],
       [-9.07727316e-04],
       [ 3.82603265e-03],
       [-3.45556295e-04],
       [-1.70926384e-05],
       [-2.15211626e-03],
       [ 2.66405102e-03],
       [-4.90124030e-04],
       [-7.87579779e-05],
       [-7.62882912e-04],
       [-8.76554829e-04],
       [ 6.42317310e-04],
       [ 3.46949172e-03],
       [ 2.42092613e-03],
       [ 1.72141350e-03],
       [-1.30712738e-03],
       [-2.73104916e-04],
       [ 5.11101650e-04],
       [-4.22171151e-03],
       [-2.54891903e-03],
       [ 3.13347384e-03],
       [-4.13477395e-03],
       [-2.13599272e-03],
       [-1.19819878e-03],
       [-8.13759549e-04],
       [ 3.69143471e-04],
       [ 3.48846507e-03],
       [-9.35560873e-04],
       [-1.99148360e-03],
       [-9.75517978e-04],
       [ 3.31443051e-04],
       [-1.29016247e-03],
       [-2.28425456e-03],
       [-1.60554041e-03],
       [ 3.28284333e-04],
       [ 7.55831220e-04],
       [-1.53474531e-03],
       [ 3.10340562e-03],
       [ 2.6

In [651]:
s = time()
epsilon = 1e-7
y_hat = sigmoid(np.dot(x,w))
output = -((1-y) * np.log(1-y_hat+epsilon)+y*np.log(y_hat+epsilon))
output = (sigmoid(output*1)) * (1-sigmoid(output*1))
dX = np.dot(x.T,output)
dW1 = np.dot(output,w.T)
e = time()
elapse_back = e - s

In [652]:
dX[:10]

array([[-285.21504305],
       [-215.76104956],
       [-524.01423763],
       [ 219.47408771],
       [ 322.51103658],
       [ 104.32323362],
       [-323.58733769],
       [ 379.37180498],
       [-190.13728372],
       [ 432.85416215]])

In [653]:
dW[:10]

array([[ 1.36434259e-03],
       [-9.07727316e-04],
       [ 3.82603265e-03],
       [-3.45556295e-04],
       [-1.70926384e-05],
       [-2.15211626e-03],
       [ 2.66405102e-03],
       [-4.90124030e-04],
       [-7.87579779e-05],
       [-7.62882912e-04]])

In [654]:
x = np.random.randn(1_000_000, 256)

In [655]:
h = 1e-5
s = time()
for _ in range(100000000):
    x[100,50] += h
    x[100,50] -= h
e = time()
elapse = e - s

In [656]:
h = 1e-5
s = time()
for _ in range(100000000):
    tmp = x[100,50]
    x[100,50] += h
    x[100,50] = tmp
e = time()
elapse_tmp = e - s

In [657]:
elapse - elapse_tmp

6.078313589096069

In [658]:
elapse

47.83647274971008