<a href="https://colab.research.google.com/github/pinkdolphin11/ESAA/blob/main/HW_1209.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 4. 합성곱 신경망(CNN)

## 4.6 객체 탐지(Object Detection)

### 6-1. 텐서플로 허브 활용

In [None]:
import tensorflow as tf #tensorflow
import tensorflow_hub as tfhub #tensorflow hub

#### 6-1-1. 샘플 이미지 준비

In [None]:
#샘플 이미지 다운로드
img_path = 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Gangnam_Seoul_January_2009.jpg/1280px-Gangnam_Seoul_January_2009.jpg'
img = tf.keras.utils.get_file(fname='gangnam', origin=img_path)
img = tf.io.read_file(img) #파일 객체를 string으로 변환
img = tf.image.decode_jpeg(img, channels=3) #문자(string)를 숫자(unit8) 텐서로 변환
img = tf.image.convert_image_dtype(img, tf.float32) #0~1 범위로 정규화

import matplotlib.pyplot as plt
plt.figure(figsize=(15,10))
plt.imshow(img)

In [None]:
img_input = tf.expand_dims(img, 0) #batch_size 추가
img_input.shape

#### 6-1-2. 사전 학습 모델

In [None]:
#TensorFlow Hub에서 모델 가져오기 - FasterRCNN + InceptionResNet V2
model = tfhub.load('https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1')

In [None]:
#모델 시그니처 (용도) 확인
model.signatures.keys()

In [None]:
#객체탐지 모델 생성
obj_detector = model.signatures['default']
obj_detector

#### 6-1-3. 추론

In [None]:
#모델을 이용하여 예측 (추론)
result = obj_detector(img_input)
result.keys() #딕셔너리 키 배열 확인

In [None]:
#탐지한 객체의 개수
len(result['detection_scores'])

In [None]:
#객체 탐지 결과를 시각화
boxes = result['detection_boxes'] #Bounding Box 좌표 예측 값
labels = result['detection_class_entities'] #분류 예측 값
scores = result['detection_scores'] #신뢰도 (confidence)

#샘플 이미지 가로 세로 크기
img_height, img_width = img.shape[0], img.shape[1]

#탐지할 최대 객체의 수
obj_to_detect = 10

#시각화
plt.figure(figsize=(15,10))
for i in range(min(obj_to_detect, boxes.shape[0])):
    if scores[i] >= 0.2:
        (ymax, xmin, ymin, xmax) = (boxes[i][0] * img_height, boxes[i][1] * img_width, boxes[i][2] * img_height, boxes[i][3] * img_width)
        
        plt.imshow(img)
        plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], color='yellow', linewidth=2)

        class_name = labels[i].numpy().decode('utf-8')
        infer_score = int(scores[i].numpy() * 100)
        annotation = '{}: {}%'.format(class_name, infer_score)
        plt.text(xmin+10, ymax+20, annotation, color='white', backgroundcolor='blue', fontsize=10)

### 6-2. YOLO 객체 탐지

#### 6-2-1. Darknet YOLO 모델 추론하기

In [None]:
#깃허브 저장소 복제
!git clone https://github.com/AlexeyAB/darknet

In [None]:
#GPU 활성화
%cd darknet
!sed -i 's/GPU=0/GPU=1' Makefile
!sed -i 's/CUDNN=0/CUDNN=1' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1' Makefile

#Darknet 생성
!make

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf

plt.figure(figsize=(15,10))
img_path = 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Gangnam_Seoul_January_2009.jpg/1280px-Gangnam_Seoul_January_2009.jpg'
img = tf.keras.utils.get_file(fname='gangnam.jpg', origin=img_path, cache_dir='/content')
img = tf.io.read_file(img)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
plt.imshow(img)

In [None]:
#Darknet 실행
!./darknet detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights /content/datasets/gangnam.jpg

In [None]:
plt.figure(figsize=(15,10))
img = tf.io.read_file('/content/darknet/predictions.jpg')
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
plt.imshow(img)

#### 6-2-2. 나만의 YOLO 모델 생성

In [None]:
#필요한 패키지 임포트
import tensorflow as tf
import numpy as np
import cv2
from google.colab.patches import cv2_imshow

#파라미터 설정

#이미지 크기
width_size = 256
height_size = 256
channel_size = 3
img_size = (width_size, height_size, channel_size)

cell_num = 3 #이미지를 나눌 크기
class_num = 3 #찾고자 하는 객체 개수

#한 셀에 그릴 박스 수
anchor_num = 1
label_num = anchor_num * (5 + class_num)

epoch_num = 500 #학습 수

#로스 비중
loss_p_rate = 1.0
loss_cod_rate = 5.0
loss_c_rate = 1.0
loss_p_no_rate = 0.5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#랜덤하게 도형을 그리고, 실제 정답 값을 생성하는 함수 정의
def make_img_label():
    img = np.zeros((height_size + 400, width_size + 400, channel_size))
    label = np.zeros((cell_num, cell_num, label_num))
    num_shape = np.random.randint(1,4)
    i = np.random.choice(range(cell_num), num_shape, replace=False)
    j = np.random.choice(range(cell_num), num_shape, replace=False)
    
    img_0 = cv2.imread('/content/drive/MyDrive/ESAA/OB/Data/0.png')
    img_1 = cv2.imread('/content/drive/MyDrive/ESAA/OB/Data/1.png')
    img_2 = cv2.imread('/content/drive/MyDrive/ESAA/OB/Data/2.png')

    for n_h in range(num_shape):
        row = i[n_h]
        col = j[n_h]
        shape_type = np.random.randint(0, class_num)
        x_rate = np.random.rand()
        y_rate = np.random.rand()
        w_rate = np.random.rand() * 0.3 + 0.1
        h_rate = np.random.rand() * 0.3 + 0.1

        label[row, col] = [1, x_rate, y_rate, w_rate, h_rate, 0, 0, 0]
        label[row, col, 5 + shape_type] = 1
        x = int(x_rate * width_size / cell_num + col * width_size / cell_num)
        y = int(y_rate * height_size / cell_num + row * height_size / cell_num)
        w = int(w_rate * width_size / 2) * 2
        h = int(h_rate * height_size / 2) * 2
        if(shape_type == 0):
            input_img = cv2.resize(img_0, (w,h))
        if(shape_type == 1):
            input_img = cv2.resize(img_1, (w,h))
        if(shape_type == 2):
            input_img = cv2.resize(img_2, (w,h))
        img[y-int(h/2)+200 : y+int(h/2)+200, x-int(w/2)+200 : x+int(w/2)+200] = input_img
    img = img[200 : 200+height_size, 200 : 200+width_size]

    return img, label

In [None]:
img, label = make_img_label()
cv2_imshow(img)

In [None]:
#이미지와 정답 (혹은 예측 값)을 넣으면 박스를 그려주는 함수 정의
#임계값 th 설정 (객체가 있다는 확률이 th 이상일 때만 박스 생성)
def show_box(img, label, th=0.3):
    b_img = np.zeros((height_size+400, width_size+400, 3))
    b_img[200 : 200+height_size, 200 : 200+width_size] = img
    for i in range(cell_num):
        for j in range(cell_num):
            if (label[i, j, 0] > th):
                x_rate = label[i, j, 1]
                y_rate = label[i, j, 2]
                w_rate = label[i, j, 3]
                h_rate = label[i, j, 4]
                shape_type = np.argmax(label[i, j, 5:])
                if (shape_type == 0):
                    line_color = [0, 0, 255]
                if (shape_type == 1):
                    line_color = [255, 0, 0]
                if (shape_type == 2):
                    line_color = [0, 255, 0]
                x = int(x_rate * width_size / 3 + j * width_size / 3)
                y = int(y_rate * height_size / 3 + i * height_size / 3)
                w = int(w_rate * width_size / 2) * 2 + 20
                h = int(h_rate * height_size / 2) * 2 + 20
                cv2.rectangle(b_img, (x-int(w/2)+200, y-int(h/2)+200), (x+int(w/2)+200, y+int(h/2)+200), line_color)
            
    b_img = b_img[200 : 200+height_size, 200 : 200+width_size]
    return b_img

In [None]:
cv2_imshow(show_box(img, label))

In [None]:
#VGG16모델을 베이스로 마지막 부분만 수정하는 모델 생성 (전이 학습)
vgg_model = tf.keras.applications.VGG16(include_top=False, input_shape=img_size)
vgg_model.trainable = False
i = tf.keras.Input(shape=img_size)
out = tf.keras.layers.Lambda((lambda x: x/255.))(i)
out = vgg_model(out)
out = tf.keras.layers.Conv2D(256, 3, padding='same')(out)
out = tf.keras.layers.Conv2D(128, 3, padding='same')(out)
out = tf.keras.layers.Conv2D(64, 3, padding='same')(out)
out = tf.keras.layers.Flatten()(out)
out = tf.keras.layers.Dense(1024, activation='relu')(out)
out = tf.keras.layers.Dense(3 * 3 * 8, activation='relu')(out)
out = tf.keras.layers.Reshape((3, 3, 8))(out)
yolo_model = tf.keras.Model(inputs=[i], outputs=[out])
opt = tf.keras.optimizers.Adam(0.00001)

#모델 요약
yolo_model.summary()

In [None]:
#학습 과정을 동영상으로 기록
fcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('hjk_yolo.avi', fcc, 1, 0, (width_size, height_size))

for e in range(epoch_num):
    img, label = make_img_label()
    img = np.reshape(img, (1, height_size, width_size, 3))
    label = np.reshape(label, (1,3,3,8))

    loss_p_list = []
    loss_cod_list = []
    loss_c_list = []
    loss_p_no_list = []

    with tf.GradientTape() as tape:
        pred = yolo_model(img)

        #이미지를 구분한 셀을 탐험
        for i in range(3):
            for j in range(3):
                #해당 셀에 객체가 있을 경우는 확률, 박스 크기, 클래스까지 모두 Loss로 계산
                if(label[0,i,j,0] == 1):
                    loss_p_list.append(tf.square(label[0,i,j,0] - pred[0,i,j,0]))
                    loss_cod_list.append(tf.square(label[0,i,j,1] - pred[0,i,j,1]))
                    loss_cod_list.append(tf.square(label[0,i,j,2] - pred[0,i,j,2]))
                    loss_cod_list.append(tf.square(label[0,i,j,3] - pred[0,i,j,3]))
                    loss_cod_list.append(tf.square(label[0,i,j,4] - pred[0,i,j,4]))
                    loss_c_list.append(tf.square(label[0,i,j,5] - pred[0,i,j,5]))
                    loss_c_list.append(tf.square(label[0,i,j,6] - pred[0,i,j,6]))
                    loss_c_list.append(tf.square(label[0,i,j,7] - pred[0,i,j,7]))

                #해당 셀에 객체가 없을 경우 객체가 없을 확률만 Loss로 계산
                else:
                    loss_p_no_list.append(tf.square(label[0,i,j,0] - pred[0,i,j,0]))

        loss_p = tf.reduce_mean(loss_p_list)
        loss_cod = tf.reduce_mean(loss_cod_list)
        loss_c = tf.reduce_mean(loss_c_list)
        loss_p_no = tf.reduce_mean(loss_p_no_list)

        #각 Loss를 비중을 곱해 더해 최종 Loss를 계산
        loss = loss_p_rate * loss_p + loss_cod_rate * loss_cod + loss_c_rate * loss_c + loss_p_no_rate * loss_p_no

    #Loss에 대한 Grad를 구하고, 각 파라미터를 업데이트
    vars = yolo_model.trainable_variables
    grad = tape.gradient(loss, vars)
    opt.apply_gradients(zip(grad, vars))

    #100번마다 동영상에 이미지를 기록
    if (e % 100 == 0):
        img = np.reshape(img, (256,256,3))
        label = pred.numpy()
        label = np.reshape(label, (3,3,8))
        sample_img = np.uint8(show_box(img, label))
        out.write(sample_img)
    print(e, '완료', loss.numpy())
out.release()

## 4.7 이미지 분할(Segmentation)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tqdm.notebook import tqdm
import tensorflow_datasets as tfds
from google.colab.patches import cv2_imshow

In [None]:
#이미지 크기
img_w = 128
img_h = 128
img_c = 3
img_shape = (img_w, img_h, img_c)

#모델 학습
epoch_num = 5
learning_rate = 0.0001
buffer_size = 1000
batch_size = 16

In [None]:
#텐서플로 데이터셋 로드
ds_str = 'oxford_iiit_pet'
ds, info = tfds.load(name=ds_str, with_info=True)

In [None]:
#이미지 전처리 함수
def preprocess_image(ds):
    #원본 이미지
    img = tf.image.resize(ds['image'], (img_w, img_h))
    img = tf.cast(img, tf.float32) / 255.0

    #분할 마스크 (0, 1, 2)
    mask = tf.image.resize(ds['segmentation_mask'], (img_w, img_h))
    mask = tf.cast(mask, tf.int32)
    mask = mask - 1
    return img, mask

In [None]:
#데이터 전처리 파이프라인
train_ds = ds['train'].map(preprocess_image).shuffle(buffer_size).batch(batch_size).prefetch(2)
test_ds = ds['test'].map(preprocess_image).shuffle(buffer_size).batch(batch_size).prefetch(2)

print(train_ds)

In [None]:
#샘플 배치 선택
img, mask = next(iter(train_ds))
len(img)

In [None]:
#샘플 이미지 출력
img = np.array(img[0]) * 255.0
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
cv2_imshow(img)

In [None]:
#샘플 마스크 출력
mask = (np.array(mask[0]/2)) * 255.0 #RGB 이미지로 표시
cv2_imshow(mask)

### 7-2. U-Net 모델(인코더-디코더)

In [None]:
base_model = tf.keras.applications.VGG16(input_shape=img_shape, include_top=False) #include_top=False: 최종 레이어 제외

In [None]:
#모델 구조
tf.keras.utils.plot_model(base_model, show_shapes=True)

In [None]:
#VGG16 중간 레이어 출력 텐서를 지정
f_model = tf.keras.Model(inputs=[base_model.input],
                         outputs=[base_model.get_layer(name='block5_conv3').output,
                                  base_model.get_layer(name='block4_conv3').output,
                                  base_model.get_layer(name='block3_conv3').output,
                                  base_model.get_layer(name='block2_conv2').output,
                                  base_model.get_layer(name='block1_conv2').output])

In [None]:
#파라미터 고정 (사전 학습된 파라미터를 인코더에 그대로 사용)
f_model.trainable = False

In [None]:
#U-Net 구조로 모델 정의

i = tf.keras.Input(shape=img_shape)

out_8_8_512, out_16_16_512, out_32_32_256, out_64_64_128, out_128_128_64 = f_model(i)

out = tf.keras.layers.Conv2DTranspose(512, 3, strides=2, padding='same')(out_8_8_512)
out = tf.keras.layers.Add()([out, out_16_16_512])

out = tf.keras.layers.Conv2DTranspose(256, 3, strides=2, padding='same')(out)
out = tf.keras.layers.Add()([out, out_32_32_256])

out = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same')(out)
out = tf.keras.layers.Add()([out, out_64_64_128])

out = tf.keras.layers.Conv2DTranspose(64, 3, strides=2, padding='same')(out)
out = tf.keras.layers.Add()([out, out_128_128_64])

out = tf.keras.layers.Conv2D(3, 3, activation='elu', padding='same')(out)
out = tf.keras.layers.Dense(3, activation='softmax')(out)

unet_model = tf.keras.Model(inputs=[i], outputs=[out])

In [None]:
#모델 구조 시각화
tf.keras.utils.plot_model(unet_model, show_shapes=True)

In [None]:
#모델 요약
unet_model.summary()

In [None]:
#모델 컴파일 및 훈련
loss_f = tf.losses.SparseCategoricalCrossentropy() #예측 클래스 개수가 3개인 다중 분류 문제
opt = tf.optimizers.Adam(learning_rate)

unet_model.compile(optimizer=opt, loss=loss_f, metrics=['accuracy'])
unet_model.fit(train_ds, batch_size=batch_size, epochs=epoch_num)

In [None]:
#1개 배치 (16개 이미지) 선택
img, mask = next(iter(test_ds))

#모델 예측
pred = unet_model.predict(img)

#첫 번째 이미지 분할 결과 출력
pred_img = np.argmax(pred[0], -1)
plt.imshow(pred_img)

In [None]:
#첫 번재 이미지의 정답 마스크 출력
plt.imshow(np.reshape(mask[0], (128,128)))