# 范例4. R-CNN

## 需安装 opencv 扩展版

#### pip uninstall opencv-contrib-python opencv-python
#### pip install opencv-contrib-python

In [1]:
import zipfile
import os

# 图像训练资料
path_to_zip_file = './images_Object_Detection/Images.zip'
directory_to_extract_to = './images_Object_Detection/'

# 检查目录是否存在 
if not os.path.isdir(directory_to_extract_to):
    # 解压缩
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)

In [2]:
# 标注训练资料
path_to_zip_file = './images_Object_Detection/Airplanes_Annotations.zip'
directory_to_extract_to = './images_Object_Detection/'

# 检查目录是否存在 
if not os.path.isdir(directory_to_extract_to):
    # 解压缩
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)

In [3]:
# 载入套件
import os,cv2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [4]:
# 设定图像及标注目录
path = "./images_Object_Detection/Images"
annot = "./images_Object_Detection/Airplanes_Annotations"

# 显示1张图像训练资料含标注
for e,i in enumerate(os.listdir(annot)):
    if e < 10:
        # 读取图像
        filename = i.split(".")[0]+".jpg"
        print(filename)
        img = cv2.imread(os.path.join(path,filename))
        df = pd.read_csv(os.path.join(annot,i))
        plt.axis('off')
        plt.imshow(img)
        # (x1, y1)：左上角座标，(x2, y2)：右下角座标
        for row in df.iterrows():
            x1 = int(row[1][0].split(" ")[0])
            y1 = int(row[1][0].split(" ")[1])
            x2 = int(row[1][0].split(" ")[2])
            y2 = int(row[1][0].split(" ")[3])
            cv2.rectangle(img,(x1,y1),(x2,y2),(255,0,0), 2)
        plt.figure()
        plt.axis('off')
        plt.imshow(img)
        break

FileNotFoundError: [WinError 3] 系統找不到指定的路徑。: './images_Object_Detection/Airplanes_Annotations'

In [None]:
# 区域推荐(Region Proposal)：Selective Search
# 读取图像
im = cv2.imread(os.path.join(path,"42850.jpg"))

# Selective Search
cv2.setUseOptimized(True);
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
ss.switchToSelectiveSearchFast()
rects = ss.process()

# 输出
imOut = im.copy()
for i, rect in (enumerate(rects)):
    x, y, w, h = rect
#     print(x,y,w,h)
#     imOut = imOut[x:x+w,y:y+h]
    cv2.rectangle(imOut, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)

plt.axis('off')    
plt.imshow(imOut)

In [None]:
# 定义 IoU 计算函数
def get_iou(bb1, bb2):
    assert bb1['x1'] < bb1['x2']
    assert bb1['y1'] < bb1['y2']
    assert bb2['x1'] < bb2['x2']
    assert bb2['y1'] < bb2['y2']

    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
# 筛选训练资料

# 储存正样本及负样本的候选框
train_images=[]
train_labels=[]

# 扫描每一个标注
for e,i in enumerate(os.listdir(annot)):
    try:
        # 取得飞机的图像
        if i.startswith("airplane"):
            filename = i.split(".")[0]+".jpg"
            print(e,filename)
            
            # 读取标注档案
            image = cv2.imread(os.path.join(path,filename))
            df = pd.read_csv(os.path.join(annot,i))
            
            # 取得所有标注的座标
            gtvalues=[]
            for row in df.iterrows():
                x1 = int(row[1][0].split(" ")[0])
                y1 = int(row[1][0].split(" ")[1])
                x2 = int(row[1][0].split(" ")[2])
                y2 = int(row[1][0].split(" ")[3])
                gtvalues.append({"x1":x1,"x2":x2,"y1":y1,"y2":y2})
                
            # 区域推荐
            ss.setBaseImage(image)
            ss.switchToSelectiveSearchFast()
            ssresults = ss.process()
            imout = image.copy()
            
            # 初始化
            counter = 0       # 正样本笔数
            falsecounter = 0  # 负样本笔数
            flag = 0          # 1:正负样本笔数均 >= 30
            fflag = 0         # 1:正样本笔数 >= 30
            bflag = 0         # 1:负样本笔数 >= 30
            
            # 扫描每一个候选框
            for e,result in enumerate(ssresults):
                if e < 2000 and flag == 0:
                    for gtval in gtvalues:
                        x,y,w,h = result
                        # 比较区域推荐区域与标注的 IoU
                        iou = get_iou(gtval,{"x1":x,"x2":x+w,"y1":y,"y2":y+h})
                        
                        # 收集30笔正样本    
                        if counter < 30:
                            if iou > 0.70:
                                timage = imout[y:y+h,x:x+w]
                                resized = cv2.resize(timage, (224,224), 
                                                     interpolation = cv2.INTER_AREA)
                                train_images.append(resized)
                                train_labels.append(1)
                                counter += 1
                        else :
                            fflag =1
                            
                        # 收集30笔负样本    
                        if falsecounter <30:
                            if iou < 0.3:
                                timage = imout[y:y+h,x:x+w]
                                resized = cv2.resize(timage, (224,224), 
                                                     interpolation = cv2.INTER_AREA)
                                train_images.append(resized)
                                train_labels.append(0)
                                falsecounter += 1
                        else :
                            bflag = 1
                            
                    # 超过30笔正样本及负样本，表有物件在框里面 
                    if fflag == 1 and bflag == 1:
                        print("inside")
                        flag = 1
    except Exception as e:
        print(e)
        print("error in "+filename)
        continue

In [None]:
from joblib import dump
dump(train_images, 'train_images.joblib')
dump(train_labels, 'train_labels.joblib')


In [None]:
from joblib import load
train_images = load('train_images.joblib')
train_labels = load('train_labels.joblib')

In [None]:
len(train_images)

In [None]:
# 定义模型
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16

vggmodel = VGG16(weights='imagenet', include_top=True)

# VGG16 前端的神经层不重作训练
for layers in (vggmodel.layers)[:15]:
    print(layers)
    layers.trainable = False

# 接自订神经层作辨识
X= vggmodel.layers[-2].output
predictions = Dense(2, activation="softmax")(X)
model_final = Model(inputs = vggmodel.input, outputs = predictions)

# 订定损失函数、优化器、效能衡量指标
from tensorflow.keras.optimizers import Adam
opt = Adam(lr=0.0001)
model_final.compile(loss = tf.keras.losses.categorical_crossentropy, 
                    optimizer = opt, metrics=["accuracy"])
model_final.summary()

In [None]:
# 定义函数，将标记 Y 转为二个变数，
from sklearn.preprocessing import LabelBinarizer

class MyLabelBinarizer(LabelBinarizer):
    def transform(self, y):
        Y = super().transform(y)
        if self.y_type_ == 'binary':
            return np.hstack((Y, 1-Y))
        else:
            return Y
    def inverse_transform(self, Y, threshold=None):
        if self.y_type_ == 'binary':
            return super().inverse_transform(Y[:, 0], threshold)
        else:
            return super().inverse_transform(Y, threshold)

In [None]:
# 资料前置处理，切割训练及测试资料
from sklearn.model_selection import train_test_split

# 笔者 PC 记忆体不足，只取 10000
X_new = np.array(train_images[:10000])
y_new = np.array(train_labels[:10000])

# 标记 Y 转为二个变数，
lenc = MyLabelBinarizer()
Y =  lenc.fit_transform(y_new)

# 切割训练及测试资料
X_train, X_test , y_train, y_test = train_test_split(X_new, Y, test_size=0.10)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
# 资料增补(Data Augmentation)
trdata = ImageDataGenerator(horizontal_flip=True, 
                            vertical_flip=True, rotation_range=90)
traindata = trdata.flow(x=X_train, y=y_train)
tsdata = ImageDataGenerator(horizontal_flip=True, 
                            vertical_flip=True, rotation_range=90)
testdata = tsdata.flow(x=X_test, y=y_test)

In [None]:
# 模型训练
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
# 定义模型存档及提早结束的 Callback
checkpoint = ModelCheckpoint("ieeercnn_vgg16_1.h5", monitor='val_loss', 
                             verbose=1, save_best_only=True, 
                             save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, 
                      verbose=1, mode='auto')

# 模型训练，节省时间，只训练 20 epochs，正式专案还是要训练较多周期
# hist = model_final.fit_generator(generator= traindata, steps_per_epoch= 10, 
#        epochs= 1000, validation_data= testdata, validation_steps=2, 
#        callbacks=[checkpoint,early])
hist = model_final.fit_generator(generator= traindata, steps_per_epoch= 10, 
         epochs= 20, validation_data= testdata, validation_steps=2, 
         callbacks=[checkpoint,early])

In [None]:
# 绘制模型训练过程的准确率
import matplotlib.pyplot as plt
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy"])
plt.show()

In [None]:
# 任选一张图片测试
im = X_test[100]
plt.imshow(im)
img = np.expand_dims(im, axis=0)
out= model_final.predict(img)

# 显示预测结果
if out[0][0] > out[0][1]:
    print("有飞机")
else:
    print("没有飞机")

In [None]:
# 测试所有档名为 4 开头的档案
z=0
for e,i in enumerate(os.listdir(path)):
    if i.startswith("4"):
        z += 1
        img = cv2.imread(os.path.join(path,i))
        # 区域推荐
        ss.setBaseImage(img)
        ss.switchToSelectiveSearchFast()
        ssresults = ss.process()
        imout = img.copy()
        
        # 物件侦测
        for e,result in enumerate(ssresults):
            if e < 2000:
                x,y,w,h = result
                timage = imout[y:y+h,x:x+w]
                resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                img = np.expand_dims(resized, axis=0)
                out= model_final.predict(img)
                
                # 机率 > 0.65 才算侦测到飞机
                if out[0][0] > 0.65:
                    cv2.rectangle(imout, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)
        plt.figure()
        plt.imshow(imout)