In [1]:
import tensorflow as tf
import numpy as np                   # advanced math library
import matplotlib.pyplot as plt      # MATLAB like plotting routines
import random                        # for generating random numbers

from tensorflow.keras.models import Sequential  # Model type to be used
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation
from tensorflow.keras.datasets import mnist     # MNIST dataset is included in Keras
from tensorflow.keras.utils import to_categorical, plot_model

print(tf.__version__) # find the version number (should be 2.x+)

# 그래피카드 유무 확인 및 메모리 확장 설정
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  print('사용가능한 GPU 갯수: ',len(gpus), '\n')
      
  try:
    # 프로그램이 실행되어 더 많은 GPU 메모리가 필요하면, 텐서플로 프로세스에 할당된 GPU 메모리 
    # 영역을 확장할 수있도록 허용
    tf.config.experimental.set_memory_growth(gpus[0], True)

  except RuntimeError as e:
    # 프로그램 시작시에 접근 가능한 장치가 설정되어야만 합니다
    print(e)

# 설치된 GPU 상세내용 확인
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

2.6.0
사용가능한 GPU 갯수:  1 

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8416743133503447878
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 16185556992
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7819126513496997259
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


In [2]:
# Step 1: Data Preparation

(x_train, y_train), (x_test, y_test) = mnist.load_data()

num_labels = len(np.unique(y_train))

# Reshape
image_size= x_train.shape[1]

x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
x_test = np.reshape(x_test, [-1, image_size, image_size, 1])

# Normalizing

x_train = x_train/255.
x_test = x_test/255.


# One-hot encoding

y_train=to_categorical(y_train)
y_test=to_categorical(y_test)

In [3]:
# Step 2: Model construction

input_shape =(image_size,image_size, 1)
batch_size = 128
kernel_size=3
pool_size=2
filters=64
dropout=0.2

# model is a stack of CNN-ReLU-MaxPooling

model = Sequential()

model.add(Conv2D(filters=filters, 
                 kernel_size=kernel_size,
                 activation='relu',
                 input_shape= input_shape))
model.add(MaxPooling2D(pool_size))

model.add(Conv2D(filters=filters, 
                 kernel_size=kernel_size,
                 activation='relu'))
model.add(MaxPooling2D(pool_size))

model.add(Conv2D(filters=filters, 
                 kernel_size=kernel_size,
                 activation='relu'))

model.add(Flatten())

# dropout added as regularizer
model.add(Dropout(dropout)) 

# output layer is 10-dim one-hot vector
model.add(Dense(num_labels))
model.add(Activation('softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 64)        640       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dropout (Dropout)            (None, 576)               0

In [4]:
# Step 3: Model compile

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [5]:
import time
start = time.time()

# Step 4: Model fit

model.fit(x_train, y_train, epochs=10, batch_size=batch_size)

end = time.time()
print('Execution time in seconds =',end-start)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Execution time in seconds = 22.152608633041382


In [6]:
# The model evaluation output shows a maximum test accuracy of 99.3%, which can be achieved for a 3-layer network with 
# 64 feature maps per layer using the Adam optimizer with dropout=0.2 . CNNs are more parameter efficient and have a higher
# accuracy than MLPs. 
# Likewise, CNNs are also suitable for learning representations from sequential data, images, and videos.
 
_, acc = model.evaluate(x_test,
                        y_test,
                        batch_size=batch_size,
                   verbose=0)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))


Test accuracy: 99.2%


# How do Convolutional Neural Networks work?


__VGG-16__
> VGG16 is a convolutional neural network model proposed by K. Simonyan and A. Zisserman from the University of Oxford in the paper “Very Deep Convolutional Networks for Large-Scale Image Recognition”. The model achieves 92.7% top-5 test accuracy in ImageNet, which is a dataset of over 14 million images belonging to 1000 classes. It was one of the famous model submitted to ILSVRC-2014.


---


<img src='https://media.geeksforgeeks.org/wp-content/uploads/20200219152207/new41.jpg'>
              
---     
         

Link 1: [How do Convolutional Neural Networks work?](https://e2eml.school/how_convolutional_neural_networks_work.html)

Link 2: [Youtube Video](https://www.youtube.com/watch?v=FmpDIaiMIeA&t=103s)





## 작동원리

  1. Convolution
  2. Pooling
  3. ReLU Activation
  4. Deep Learning
  5. Fully Connected Layers


### Convolution

아래 그림과 같이 컴퓨터에게 두 이미지중 하나를 제시하고 X와 O를 구분하게 하는 단순한 모형을 생각해보자.

<img src = 'https://e2eml.school/images/cnn1.png'>

제시되는 이미지는 축소(shrunken), 약간의 변형(deformed), 이동(shifted), 회전(rotation)이 된 그림일 수도 있다.

<img src = "https://e2eml.school/images/cnn2.png">

컴퓨터에게 이미지는 각 pixel에 숫자가 들어간 2차원 행렬로 인식된다. 우리 예에서는 흰색은 1, 흑색은 -1로 표시하였다.

**Features (Patterns, Representations)**

<img src='https://e2eml.school/images/cnn3.png'>

CNN은 이미지를 여러개의 조각(patch, piece)으로 나누어 piece-by-piece로 패턴을 비교합니다.

  - CNNs get a lot better at seeing similarity than whole-image matching schemes.
  
<img src = 'https://e2eml.school/images/cnn4.png'>

  - Each feature is like a mini-image—a small two-dimensional array of values.
  - Features match common aspects of the images.

X의 경우 대각선(Diagonal Lines) feature와 크로스(X) feature가 그림에서 나온 여러개의 패치에서 일치 또는 유사한 경우를 찾게될 것이다.

1. Convolution

이미지가 처음 제시되었을 때 CNN은 정확히 어느 부분에 주어진 Feature가 match되는지 알 수 없기때문에 부분부분 옮겨 가면서 일치 또는 유사 여부를 확인하게 된다.이 과정에 사용되는 수학이 Convolution(합성곱)이며 이때 부분 부분 옮겨 가면서 사용되는 Feature를 필터(Filters)라고 한다.

<img src="https://e2eml.school/images/cnn6.png">


**합성곱 연산**

    ```
    To calculate the match of a feature to a patch of the image, 
      (1) simply multiply each pixel in the feature by the value of the corresponding pixel in the image. 
      (2) Then add up the answers and divide by the total number of pixels in the feature. 
    ```

- Every matching pixel results in 1. 
- Similarly, any mismatch is  -1. 
- If all the pixels in a feature match, then adding them up and dividing by the total number of pixels gives 1. 
- Similarly, if none of the pixels in a feature match the image patch, then the answer is -1.

이렇게 해서 얻어진 2차원 행렬은 적용된 필터와 일치/유사/불일치하는지를 보여주는 지도(map)라 할 수 있으며 또한 원이미지에 필터를 적용한 축소된 이미지라 할 수있다.

  - Values close to 1 show strong matches
  - Values close to -1 show strong matches for the photographic negative of our feature 
  - Values near zero show no match of any sort.

이 과정을 우리가 설정한 모든 필터(우리의 예에서는 3개)에 적용하면 이미지 셋( a set of 3 filtered images)을 구할 수 있다.

<img src='https://e2eml.school/images/cnn7.png'>


    ```
    It’s easy to see how CNNs get their reputation as computation hogs.
    Although we can sketch our CNN on the back of a napkin, the number of additions, multiplications and divisions can add up fast. 
    ```



### Pooling

  Pooling은 CNN의 핵심 요소중 하나로서 큰 사이즈의 이미지를 중요한 정보는 유지하면서 축소시키려는 기법이다.

  <img src='https://e2eml.school/images/cnn8.png'>

  - In practice, a window 2 or 3 pixels on a side and steps of 2 pixels work well.
  - After pooling, an image has about a quarter as many pixels as it started with. 
  - Because it keeps the maximum value from each window, it preserves the best fits of each feature within the window.
  - A pooling layer is just the operation of performing pooling on an image or a collection of images. 
  - The output will have the same number of images, but they will each have fewer pixels. 
  - This is also helpful in managing the computational load. 

  <img src='https://e2eml.school/images/cnn9.png'>




### Rectified Linear Units (ReLU)

  <img src='https://e2eml.school/images/cnn10.png'>
  
  - A small but important player in this process is the Rectified Linear Unit or ReLU. 
  - It’s math is also very simple—wherever a negative number occurs, swap it out for a 0. 
    
$$
R(z) =  max(z,0) =
  \begin{cases}
			0, & \text{for $z \lt 0$}\\
      z, & \text{for $z \ge 0$}
	\end{cases}
$$

  - This helps the CNN stay mathematically healthy by keeping learned values from getting stuck near 0 or blowing up toward infinity. It’s the axle grease of CNNs.

  <img src='https://e2eml.school/images/cnn11.png'>

  The output of a ReLU layer is the same size as whatever is put into it, just with all the negative values removed.




### Deep learning

  <img sr = 'https://e2eml.school/images/cnn12.png'>

  - The input to each layer (two-dimensional arrays) looks a lot like the output (two-dimensional arrays). Because of this, we can stack them like Lego bricks. 
  - Raw images get filtered, rectified and pooled to create a set of shrunken, feature-filtered images. These can be filtered and shrunken again and again.
  - Each time, the features become larger and more complex, and the images become more compact. 
  - This lets lower layers represent simple aspects of the image, such as edges and bright spots. 
  - Higher layers can represent increasingly sophisticated aspects of the image, such as shapes and patterns. These tend to be readily recognizable. For instance, in a CNN trained on human faces, the highest layers represent patterns that are clearly face-like. 

  <img src='https://e2eml.school/images/cnn18.png'>

 

### Fully connected layers (Dense Layers)

  Fully connected layers take the high-level filtered images and translate them into votes. In our case, we only have to decide between two categories, X and O. 

  <img src='https://e2eml.school/images/cnn13.png'>

When a new image is presented to the CNN, it percolates through the lower layers until it reaches the fully connected layer at the end. Then an election is held. 

However, the process isn’t entirely democratic. Some values are much better than others at knowing when the image is an X, and some are particularly good at knowing when the image is an O. These get larger votes than the others. These votes are expressed as weights, or connection strengths, between each value and each category.

In practice, several fully connected layers are often stacked together, with each intermediate layer voting on phantom “hidden” categories. In effect, each additional layer lets the network learn ever more sophisticated combinations of features that help it make better decisions.

  <img src='https://e2eml.school/images/cnn14.png'>



### Backpropagation

Our story is filling in nicely, but it still has a huge hole—Where do features come from? and How do we find the weights in our fully connected layers?

If these all had to be chosen by hand, CNNs would be a good deal less popular than they are. Luckily, a bit of machine learning magic called backpropagation does this work for us.

We start with an untrained CNN where every pixel of every feature and every weight in every fully connected layer is set to a random value. Then we feed images through it, one after other.

Each image the CNN processes results in a vote. The amount of wrongness in the vote, the error, tells us how good our features and weights are. The features and weights can then be adjusted to make the error less. Each value is adjusted a little higher and a little lower, and the new error computed each time. Whichever adjustment makes the error less is kept. After doing this for every feature pixel in every convolutional layer and every weight in every fully connected layer, the new weights give an answer that works slightly better for that image. This is then repeated with each subsequent image in the set of labeled images.



### Hyperparameters

Unfortunately, not every aspect of CNNs can be learned in so straightforward a manner. There is still a long list of decisions that a CNN designer must make.

For each convolution layer, How many features? How many pixels in each feature?
For each pooling layer, What window size? What stride?
For each extra fully connected layer, How many hidden neurons?
In addition to these there are also higher level architectural decisions to make: How many of each layer to include?






















In [7]:
# Calculate O for the first covolutional layer(Conv2D) for our model

''' 
  kernel_size(K) = 3
  pool_size(P) = 2
  filters(N) = 64 
'''

I=28
K= 3
N = 64
S = 1
P = 0

O = (I-K+2*P)/S +1
O

26.0

## CNN의 parameter 개수와 tensor 사이즈 계산하기

네트워크의 텐서 사이즈와 파라미터의 갯수를 계산하는 공식에 대해 다루려 한다.
아래의 AlexNet을 이용하여 예시를 든다.

<img src = 'https://seongkyun.github.io/assets/post_img/study/2019-01-25-num_of_parameters/fig1.png'>

### AlexNet의 구조

  - Input: 227* 227*3 크기의 컬러 이미지. 
  - Conv-1: 11*11 크기의 커널 96개, stride=4, padding=0
  - MaxPool-1: stride 2, 3*3 max pooling layer
  - Conv-2: 5*5 크기의 커널 256개, stride=1, padding=2
  - MaxPool-2: stride 2, 3*3 max pooling layer
  - Conv-3: 3*3 크기의 커널 384개, stride=1, padding=1
  - Conv-4: 3*3 크기의 커널 384개, stride=1, padding=1
  - Conv-5: 3*3 크기의 커널 256개, stride=1, -   padding=1
  - Maxpool-3: stride 2, 3*3 max pooling layer
  - FC-1: 4096개의 fully connected layer
  - FC-2: 4096개의 fully connected layer
  - FC-3: 1000개의 fully connected layer

__특징__

  - Relu activation function is used instead of Tanh to add non-linearity. It accelerates the speed by 6 times at the same accuracy.
  - Use dropout instead of regularisation to deal with overfitting. However, the training time is doubled with the dropout rate of 0.5.
  - Overlap pooling to reduce the size of the network. It reduces the top-1 and top-5 error rates by 0.4% and 0.3%, respectively. 


__AlexNet의 총 parameter 개수 및 출력 tensor size__

  - AlexNet의 전체 parameter 수는 5개의 convolution layer와 3개의 FC layer에서 계산되는 parameter 개수들의 합
    - 62,378,344 개.
  - 자세한 parameter 및 tensor size는 아래 표 참조


  <img src ='https://seongkyun.github.io/assets/post_img/study/2019-01-25-num_of_parameters/fig2.png'>



### Convolution layer의 output tensor size

각각 기호를 아래와 같이 정의
  - $O$: Size(width) of output image
  - $I$: Size(width) of input image
  - $K$: Size(width) of kernels used in the Conv layer
  - $N$: Number of kernels
  - $S$: Stride of the convolution operation
  - $P$: Padding size

__$O$(width of output image)는 다음과 같이 계산__

$$ O = \frac{I-K+2P}{S}+1$$ 

  - 출력 이미지의 채널 수는 커널의 갯수($N$)와 같음

__MaxPool layer의 output tensor size__

각각 기호를 아래와 같이 정의
  - $O$: Size(width) of output image
  - $I$: Size(width) of input image
  - $S$: Stride of the convolution operation
  - $P_s$: Pooling size

$$ O = \frac{I-P_s}{S}+1$$ 

Convolution layer와는 다르게 출력의 채널 수는 입력의 개수와 동일  

___      

First Convolution Layer의 출력 이미지

In [8]:
# Calculate O for the first convolutional layer for Alexnet architectue
I = 227
N = 96
K = 11
S = 4
P = 0

O = int((I-K+2*P)/S +1)
print('First Convolution Layer의 출력 이미지의 크기(width) =',int(O))
print('First Convolution Layer의 출력 이미지의 shape =',(O,O,N))



First Convolution Layer의 출력 이미지의 크기(width) = 55
First Convolution Layer의 출력 이미지의 shape = (55, 55, 96)


First Pooling Layer의 출력 이미지

In [9]:
# Calculate O for the first pooling layer for Alexnet architectue
I = O
S = 2
P = 0
P_s = 3

O = int((I-P_s)/S +1)
print('First Pooling Layer의 출력 이미지의 크기(width) =',int(O))
print('First Pooling Layer의 출력 이미지의 shape =',(O,O,N))

First Pooling Layer의 출력 이미지의 크기(width) = 27
First Pooling Layer의 출력 이미지의 shape = (27, 27, 96)


Second Convolution Layer의 출력 이미지

In [10]:
# Calculate O for the second covolutional layer for Alexnet architectue

I = O
K = 5
N = 256
S = 1
P = 2

O = int((I-K+2*P)/S +1)
print('Second Conv Layer의 출력 이미지의 크기(width) =',int(O))
print('Second Conv Layer의 출력 이미지의 shape =',(O,O,N))

Second Conv Layer의 출력 이미지의 크기(width) = 27
Second Conv Layer의 출력 이미지의 shape = (27, 27, 256)


Second pooling Layer의 출력 이미지는 (27,27,256)

In [11]:
# Calculate O for the second pooling layer for Alexnet architectue
I = O
S = 2
P = 0
P_s = 3

O = int((I-P_s)/S +1)
print('Second Pooling Layer의 출력 이미지의 크기(width) =',int(O))
print('Second Pooling Layer의 출력 이미지의 shape =',(O,O,N))

Second Pooling Layer의 출력 이미지의 크기(width) = 13
Second Pooling Layer의 출력 이미지의 shape = (13, 13, 256)


Third Conv Layer의 출력 이미지

In [12]:
# Calculate O for the third conv layer for Alexnet architectue
I = O
K = 3
N = 384
S = 1
P = 1

O = int((I-K+2*P)/S +1)
print('Third Conv Layer의 출력 이미지의 크기(width) =',int(O))
print('Third Conv Layer의 출력 이미지의 shape =',(O,O,N))

Third Conv Layer의 출력 이미지의 크기(width) = 13
Third Conv Layer의 출력 이미지의 shape = (13, 13, 384)


The 4th Conv Layer의 출력 이미지

In [13]:
# Calculate O for the 4th conv layer for Alexnet architectue
I = O
K = 3
N = 384
S = 1
P = 1

O = int((I-K+2*P)/S +1)
print('4th Conv Layer의 출력 이미지의 크기(width) =',int(O))
print('4th Conv Layer의 출력 이미지의 shape =',(O,O,N))

4th Conv Layer의 출력 이미지의 크기(width) = 13
4th Conv Layer의 출력 이미지의 shape = (13, 13, 384)


The 5th Conv Layer의 출력 이미지

In [14]:
# Calculate O for the 5th conv layer for Alexnet architectue
I = O
K = 3
N = 256
S = 1
P = 1

O = int((I-K+2*P)/S +1)
print('5th Conv Layer의 출력 이미지의 크기(width) =',int(O))
print('5th Conv Layer의 출력 이미지의 shape =',(O,O,N))

5th Conv Layer의 출력 이미지의 크기(width) = 13
5th Conv Layer의 출력 이미지의 shape = (13, 13, 256)


In [15]:
# The last Pooling Layer의 출력 이미지

In [16]:
# Calculate O for the last pooling layer for Alexnet architectue
I = O
S = 2
P = 0
P_s = 3

O = int((I-P_s)/S +1)
print('Last Pooling Layer의 출력 이미지의 크기(width) =',int(O))
print('Last Pooling Layer의 출력 이미지의 shape =',(O,O,N))

Last Pooling Layer의 출력 이미지의 크기(width) = 6
Last Pooling Layer의 출력 이미지의 shape = (6, 6, 256)


### Convolution layer의 parameter 갯수

  - CNN의 각 layer는 weight parameter와 bias parameter가 존재.
  - 전체 네트워크의 parameter 수는 각 conv layer 파라미터 수의 합

각각 기호를 아래와 같이 정의

  - $W_c$: Number of weights of the Conv layer
  - $B_c$: Number of biases of the Conv layer
  - $P_c$: Number of parameters of the Conv layer
  - $K$: Size(width) of kernels used in the Conv layer
  - $N$: Number of kernels
  - $C$: Number of channels of the input image

  $$W_c = K^2 \times C \times N$$
  $$B_c = N$$
  $$P_c = W_c + B_c$$

  - Conv layer에서 모든 커널의 깊이는 항상 입력 이미지의 채널 수와 같음
  - 따라서 모든 커널에는 $K^2\times C$개의 parameter들이 있으며, 그러한 커널들이 $N$개 존재

AlexNet의 Conv-1

In [17]:
C = 3
K = 11
N = 96

Wc = K**2*C*N
Bc = N
Pc = Wc+Bc

Wc, Bc, Pc

(34848, 96, 34944)

__Fully Connnected layer의 parameter 갯수__

#### Case1: FC layer connected to a Conv layer

각각의 기호를 아래와 같이 정의
  - $W_{cf}$: Number of weights of a FC layer which is connected to a Conv layer
  - $B_{cf}: Number of biases of a FC layer which is connected to a Conv layer
  - $P_{cf}: Number of parameters of a FC layer which is connected to a Conv layer
  - $O$: Size(width) of th output image of the previous Conv layer
  - $N$: Number of kernels in the previous Conv layer
  - $F$: Number of neurons in the FC Layer

  $$W_{cf} = O^2 \times N \times F$$
  $$B_{cf} = F$$
  $$P_{cf} = W_{cf} + B_{cf}$$

#### Case2: FC layer connected to a FC Layer
각각의 기호를 아래와 같이 정의
  - $W_{ff}$: Number of weights of a FC layer which is connected to a FC layer
  - $B_{ff}$: Number of biases of a FC layer which is connected to a FC layer
  - $P_{ff}$: Number of parameters of a FC layer which is connected to a FC layer
  - $F$: Number of neurons in th FC layer
  - $F_{-1}$: Number of neurons in the previous FC layer

  $$W_{ff} = F_{-1} \times F$$
  $$B_{ff} = F$$
  $$P_{ff} = W_{ff} + B_{ff}$$


위의 식에서, $W_{ff} = F_{-1} \times F$는 이전 FC layer의 neuron과 현재 FC layer의 neuron 사이의 총 연결 가중치의 개수.
Bias parameter의 개수는 뉴런의 개수()와 같음



Conv layer의 마지막단에 바로 붙는 FC

In [18]:
O = 6
N = 256
F = 4096

W_cf = O**2*N*F
B_cf = F
P_cf = W_cf+B_cf

W_cf, B_cf, P_cf



(37748736, 4096, 37752832)

이 수는 모든 Conv layer의 pameter 갯수들보다 많은 수(그만큼 FC layer에는 많은 파라미터들이 필요)

마지막 FC layer인 FC-3

In [19]:
F_1 = 4096
F = 1000
W_ff = F_1*F
B_ff = F
P_ff =W_ff+B_ff

W_ff, B_ff, P_ff

(4096000, 1000, 4097000)

___

#### 앞에서 살펴본 mnist CNN 모델에 대한 출력 shaped와 파라미터에 대해 알아보자. 

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 64)        640       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dropout (Dropout)            (None, 576)               0

In [21]:
# Calculate O for the first covolutional layer for Our model
''' 
  kernel_size(K) = 3
  pool_size(P) = 2
  filters(N) = 64 
'''

I=28
K= 3
N = 64
S = 1
# P = 0

O = int((I-K+2*P)/S +1)
print('first Conv Layer의 출력 이미지의 크기(width) =',int(O))
print('first Conv Layer의 출력 이미지의 shape =',(O,O,N))

C = 1

Wc = K**2*C*N
Bc = N
Pc = Wc+Bc

print('first Conv Layer의 파라미터 갯수 =',Pc)



first Conv Layer의 출력 이미지의 크기(width) = 26
first Conv Layer의 출력 이미지의 shape = (26, 26, 64)
first Conv Layer의 파라미터 갯수 = 640


In [22]:
# Calculate O for the first maxpooling layer for our model

I = O
K = 3
N = 64

# P = 0
P_s = 2
S = 2 # In Keras, stride가 선언되어 있지 않으면 디폴트 값은 Pool Size.
      # max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same')

O = int((I-P_s)/S +1)
print('the first maxpooling layer의 출력 이미지의 크기(width) =',int(O))
print('the first maxpooling layer의 출력 이미지의 shape =',(O,O,N))

the first maxpooling layer의 출력 이미지의 크기(width) = 13
the first maxpooling layer의 출력 이미지의 shape = (13, 13, 64)


__model.summary()와 같은 결과가 나오는지 확인해 보세요.__