In [1]:
import numpy as np

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))
def step(x):
    return np.where(x>0,1,0)
def relu(x):
    return np.where(x>0,x,0)
def softmax(x):
    x = x- np.max(x,axis=1).reshape(-1,1)
    return np.exp(x)/np.sum(np.exp(x),axis=1).reshape(-1,1)
def cross_entropy(y,t):
    return -np.sum(t*np.log(y))             
def numerical_gradient(f,x) :
    h = 1e-4
    grads = np.zeros_like(x)
    it = np.nditer(x,flags=['multi_index'],op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_x = x[idx]
        x[idx] = tmp_x + h
        fxh = f(x)
        x[idx] = tmp_x
        fx = f(x)
        grads[idx]  = (fxh-fx)/h
        it.iternext()
    return grads               

In [3]:
import tensorflow
from tensorflow import keras

In [4]:
from keras.datasets import mnist

In [5]:
from sklearn.datasets import load_iris

In [6]:
X = load_iris()['data']
y = load_iris()['target']

In [7]:
def make_one(x):
    data = np.zeros((x.size, np.unique(x).size))
    for idx, x in enumerate(x):
        data[idx, x] = 1
    return data


In [8]:
y = make_one(y)

In [9]:
W1 = np.random.randn(4,10)/2
b1 = np.zeros(10)
W2 = np.random.randn(10,5)/2
b2 = np.zeros(5)
W3 = np.random.randn(5,3)/2
b3 = np.zeros(3)

In [10]:
l1 = np.dot(X,W1) + b1
z1 = relu(l1)
l2 = np.dot(z1,W2) + b2
z2 = relu(l2)
l3 = np.dot(z2,W3) + b3
pred = softmax(l3)

In [11]:
def predict(x):
    l1 = np.dot(X,W1) + b1
    z1 = relu(l1)
    l2 = np.dot(z1,W2) + b2
    z2 = relu(l2)
    l3 = np.dot(z2,W3) + b3
    pred = softmax(l3)
    return pred

In [12]:
cross_entropy(pred,y)

222.30928432052588

In [13]:
cross_entropy(predict(X),y)

222.30928432052588

In [14]:
def numerical_gradient2(f,x) :
    h = 1e-4
    grads = np.zeros_like(x)
    it = np.nditer(x,flags=['multi_index'],op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_x = x[idx]
        x[idx] = tmp_x + h
        fxh = f(x)
        x[idx] = tmp_x = h
        fx = f(x)
        grads[idx]  = (fxh-fx)/2*h
        x[idx] = tmp_x
        it.iternext()
    return grads

In [15]:
def gradient_descent(x,t,lr):
    lr = 1e-4
    y = predict(x)
    loss = lambda x: cross_entropy(y,t)
    gred_desc1 = numerical_gradient2(loss, W1)
    gred_desc2 = numerical_gradient2(loss, W2)
    gred_desc3 = numerical_gradient2(loss, W3)
    gred_desc1_b = numerical_gradient2(loss, b1)
    gred_desc2_b = numerical_gradient2(loss, b2)
    gred_desc3_b = numerical_gradient2(loss, b3)
    W1 = W1 - gred_desc1 * lr
    W2 = W2 - gred_desc2 * lr
    W3 = W3 - gred_desc3 * lr
    b1 = b1 = gred_desc1_b * lr
    b2 = b2 = gred_desc2_b * lr
    b3 = b3 = gred_desc3_b * lr
    return W1

In [52]:
class Network:
    def __init__(self):
        self.W1 = np.random.randn(784)
        self.b1 = np.zeros(258)
        self.W2 = np.random.randn(258)
        self.b2 = np.zeros(10)

    def predict(self, x):
        l1 = np.dot(x,self.W1) + self.b1
        z1 = relu(l1)
        l2 = np.dot(z1,self.W2) + self.b2
        pred = softmax(l2)
        return pred

    def loss(self, x, t):
        y = self.predict(x)
        loss = cross_entropy(y,t)
        return loss

    def gradient_descent(self, x, t, lr):
        W_loss = lambda W: self.loss(x, t)
        gred_desc1 = numerical_gradient(W_loss, self.W1)
        gred_desc2 = numerical_gradient(W_loss, self.W2)
        gred_desc1_b = numerical_gradient(W_loss, self.b1)
        gred_desc2_b = numerical_gradient(W_loss, self.b2)
        self.W1 = self.W1 - gred_desc1 * lr
        self.W2 = self.W2 - gred_desc2 * lr
        self.b1 = self.b1 = gred_desc1_b * lr
        self.b2 = self.b2 = gred_desc2_b * lr
        return self.W1

    def accuracy(self,x,t):
        result = np.argmax(self.predict(x), axis=1) == np.argmax(t,axis=1)
        result = np.sum(result)/x.shape[0]
        return result


In [47]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [48]:
y_train = make_one(y_train)

In [49]:
y_train.shape

(60000, 10)

In [50]:
network = Network()

In [53]:
epochs = 100
for epoch in range(epochs):
    network.gradient_descent(X_train, y_train, 1e-5)
    losses = network.loss(X_train,y_train)
    acc = network.accuracy(X_train,y_train)
    if epoch % 10 == 0:
        print(epoch+1,"==========","acc =====", acc,"loss =====", losses)

ValueError: shapes (60000,28,28) and (784,258) not aligned: 28 (dim 2) != 784 (dim 0)

In [26]:
from sklearn.datasets import load_iris
X = load_iris()['data']
y = load_iris()['target']
y = make_one(y)

In [55]:
# 필요한 라이브러리 불러오기
from typing import Sequence
from keras.datasets import mnist
from keras import models
from keras import layers
from keras.utils import to_categorical

(X_train, y_train), (X_test, y_test) = mnist.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# 모델 정의하기 (여기에서는 Sequential 클래스 사용)
model = models.Sequential()
model.add(layers.Flatten(input_shape=(28 * 28,)))

model.add(layers.Dense(256, activation='sigmoid'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))

# 모델 컴파일 하기
model.compile(optimizer='sgd',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

