In [1]:
cd /content/drive/MyDrive/AI부트캠프/SeSAC_DL

/content/drive/MyDrive/AI부트캠프/SeSAC_DL


In [2]:
import numpy as np

In [3]:
from common.functions import *
from common.gradient import numerical_gradient

# 2층 신경망 구현

In [17]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,
                 weight_init_std=0.01):
        self.params={}
        
        # 1층 
        self.params['W1']=np.random.randn(input_size,hidden_size)*weight_init_std
        self.params['b1']=np.zeros(hidden_size)

        # 2층
        self.params['W2']=np.random.randn(hidden_size,output_size)*weight_init_std
        self.params['b2']=np.zeros(output_size)

    def predict(self,x):
        W1,W2=self.params['W1'],self.params['W2']
        b1,b2=self.params['b1'],self.params['b2']

        a1=np.dot(x,W1)+b1
        z1=sigmoid(a1)
        a2=np.dot(z1,W2)+b2 
        y_pred=a2  

        return y_pred

    def loss(self,x,y_true):
        y_pred=self.predict(x)
        y_pred=softmax(y_pred)

        loss=cross_entropy_error(y_pred,y_true)
        return loss

    def accuracy(self,x,y_true):
        y_pred=self.predict(x)
        y_pred=np.argmax(y_pred,axis=1)
        y_true=np.argmax(y_true,axis=1)

        accuracy=np.sum(y_pred==y_true)/x.shape[0]

        return accuracy
    
    def numerical_gradient(self,x,y_true):
        loss_W= lambda _: self.loss(x,y_true)
        
        grads={}
        grads['W1']=numerical_gradient(loss_W,self.params['W1'])
        grads['b1']=numerical_gradient(loss_W,self.params['b1'])
        grads['W2']=numerical_gradient(loss_W,self.params['W2'])
        grads['b2']=numerical_gradient(loss_W,self.params['b2'])

        return grads

In [18]:
net=TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

In [19]:
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)

(784, 100)
(100,)
(100, 10)
(10,)


# 샘플 데이터

In [27]:
X=np.random.rand(100,784) # 100x784 데이터 생성 : 100장의 이미지
y_true=np.random.rand(100,10) # 100개의 샘플 True값

In [28]:
# 신경망 선언
net=TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

In [29]:
# [Predict]
y_pred=net.predict(X)
print(y_pred.shape) 
print(y_pred[:3])

(100, 10)
[[ 0.03400096 -0.03266392 -0.03543476 -0.05807624 -0.07673382 -0.05311776
   0.01557887 -0.06038576  0.00200353 -0.03531445]
 [ 0.03243327 -0.03371378 -0.03740778 -0.06051172 -0.07634332 -0.04851935
   0.02042028 -0.05937103  0.00556836 -0.04059741]
 [ 0.03246339 -0.03581169 -0.03600962 -0.05685637 -0.0818308  -0.05078366
   0.01532987 -0.05938169  0.00819412 -0.03447138]]


In [30]:
# [기울기 계산]
grads=net.numerical_gradient(X,y_true)

In [None]:
print(grads['W1'].shape)
print(grads['b1'].shape)
print(grads['W2'].shape)
print(grads['b2'].shape)

# MNIST 데이터

In [31]:
from util.mnist import load_mnist
import numpy as np
from tqdm import tqdm

In [32]:
(X_train,y_train),(X_test,y_test)=load_mnist(normalize=True,
                                             one_hot_label=True)

In [33]:
X_train.shape

(60000, 784)

6만장의 이미지

In [39]:
# 사전 세팅
train_loss_list=[]

iters_num=10000 
train_size=X_train.shape[0] # 6만
batch_size=100
learning_rate=0.1

net=TwoLayerNet(input_size=784,hidden_size=30,output_size=10)

In [40]:
batch_mask=np.random.choice(train_size,batch_size)
    # train_size범위에서 batch_size만큼 샘플링

In [41]:
batch_mask

array([49835, 59130, 57520, 17124, 50691, 47073, 26341, 34064, 44455,
       11843, 44511, 40306, 51418, 44397, 16204, 45484,  2306, 32014,
       14494,  2380, 21878, 41857, 32384, 44636,  8413, 35520, 29596,
       25815, 19224, 42461, 49865, 10410, 48318, 44824, 43351, 52401,
       32309, 24627, 42816,  6408, 30474, 32349, 51331, 32190,  2509,
        6957, 10306, 56228, 48397,  9368, 55946,  9299, 25166, 53503,
       46519, 37569, 41689, 38875, 54600, 56150, 45726, 44079, 57320,
       12607, 58210,  8214, 40857, 13128,  5410,   350, 16767, 50647,
        5755, 53106, 20890, 29543,   552, 33133, 17697, 15007, 30257,
        8424, 28088, 32150,  7507, 27599, 10415, 57592, 41343, 47735,
       32631, 26242, 32019, 53000, 43757, 48968, 51778,  1425,  2447,
       38982])

In [None]:
# 학습과정 구현 - 실행 X
for i in tqdm(range(iters_num)):
    batch_mask=np.random.choice(train_size,batch_size)
    X_batch=X_train[batch_mask]
    y_batch=y_train[batch_mask] 

    grads=net.numerical_gradient(X_batch,y_batch)

    for key in grads.keys():
        grads[key]-=learning_rate*grads[key]
    
    loss=net.loss(X_batch,y_batch)
    train_loss_list.append(loss)

굉장히 오래 걸린다

# 3층 신경망 구현  
+ 헷갈렸던 거  
    randn->표준정규분포 난수생성  
    rand->유니폼분포 난수생성

In [43]:
class ThreeLayerNet:
    def __init__(self,input_size,hidden1,hidden2,output_size,
                 weight_init_std=0.1):
        self.params={}

        # 1층
        self.params['W1']=np.random.randn(input_size,hidden1)*weight_init_std
        self.params['b1']=np.zeros(hidden1)

        # 2층
        self.params['W2']=np.random.randn(hidden1,hidden2)*weight_init_std
        self.params['b2']=np.zeros(hidden2)

        # 3층
        self.params['W3']=np.random.randn(hidden2,output_size)*weight_init_std
        self.params['b3']=np.zeros(output_size)

    def predict(self,x):
        W1,W2,W3=self.params['W1'],self.params['W2'],self.params['W3']
        b1,b2,b3=self.params['b1'],self.params['b2'],self.params['b3']

        a1=np.dot(x,W1)+b1
        z1=sigmoid(a1)

        a2=np.dot(z1,W2)+b2
        z2=sigmoid(a2)

        a3=np.dot(z2,W3)+b3
        y=a3

        return y
    
    def loss(self,x,y_true):
        y_pred=self.predict(x)
        y_pred=softmax(y_pred)

        loss=cross_entropy_error(y_pred,y_true)
        return loss

    def accuracy(self,x,y_true):
        y_pred=self.predict(x)
        y_pred=np.argmax(y_pred,axis=1)
        y_true=np.argmax(y_true,axis=1)

        accuracy=np.sum(y_pred==y_true)/y_pred.shape[0]
        return accuracy

    def numerical_gradient(self,x,y_true):
        loss_W= lambda _:self.loss(x,y_true)

        grads={}
        grads['W1']=numerical_gradient(loss_W,self.params['W1'])
        grads['W2']=numerical_gradient(loss_W,self.params['W2'])
        grads['W3']=numerical_gradient(loss_W,self.params['W3'])
        grads['b1']=numerical_gradient(loss_W,self.params['b1'])
        grads['b2']=numerical_gradient(loss_W,self.params['b2'])
        grads['b3']=numerical_gradient(loss_W,self.params['b3'])

        return grads

## 샘플데이터

In [44]:
net=ThreeLayerNet(input_size=784,hidden1=100,hidden2=50,output_size=10)

In [45]:
X=np.random.randn(100,784)
y_true=np.random.rand(100,10)

In [46]:
y_pred=net.predict(X)

In [48]:
print(y_pred.shape)
y_pred[:3]

(100, 10)


array([[-0.10047969,  0.01093531, -0.16905225, -0.44172002, -0.17189823,
         0.29051714,  0.30232656, -0.10518577, -0.35107223,  0.27162452],
       [-0.0716381 , -0.0329428 , -0.13752472, -0.46024972, -0.10738962,
         0.26638996,  0.18043935, -0.11817136, -0.48012476,  0.29337804],
       [-0.07993017, -0.03250871, -0.08354453, -0.42130044, -0.20648991,
         0.31202025,  0.29005138, -0.22673387, -0.5241743 ,  0.38842384]])

In [49]:
net.loss(X,y_true)

2.35668390043883

In [50]:
net.accuracy(X,y_true)

0.09

+ 아래 셀들은 실행 X

In [None]:
net.numerical_gradient(X,y_true) 

In [52]:
train_loss_list=[]
train_accuracy_list=[]
test_accuracy_list=[]

iters_num=10000
train_size=X_train.shape[0]
batch_size=10000
learning_rate=0.1
iter_per_epoch=train_size/batch_size #6

net=TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

for i in tqdm(range(iters_num)):
    batch_mask=np.random.choice(train_size,batch_size)
    X_batch=X_train[batch_mask]
    y_batch=y_train[batch_mask]

    grads=net.numerical_gradient(X_batch,y_batch)

    for key in grads.keys():
        grads[key]-=learning_rate*grads[key]
    
    loss=net.loss(X_batch,y_batch)
    train_loss_list.append(loss)

    if i%iter_per_epoch==0:
        # 에포크가 한번 돌때마다 정확도 계산
        train_acc=net.accuracy(X_train,y_train)
        test_acc=net.accuracy(X_test,y_test)
        print('train acc, test acc | '+str(train_acc)+','+str(test_acc))

        train_accuracy_list.append(train_acc)
        test_accuracy_list.append(test_acc)

  0%|          | 0/10000 [00:09<?, ?it/s]


KeyboardInterrupt: ignored