# csvからアノテーションデータロードするSSD テスト

## アノテーションファイルをファイル出力する

In [1]:
import os
import numpy as np
import json
import shutil

_train_images_path = os.path.join(r'D:\work\AI_Edge_Contest\object_detect\origdata\01.zip_expansion\dtc_train_images')
_train_annotations_path = os.path.join(r'D:\work\AI_Edge_Contest\object_detect\origdata\01.zip_expansion\dtc_train_annotations')
train_annotations_files = os.listdir(_train_annotations_path)
train_images_files = os.listdir(_train_images_path)

In [2]:
per_category = {}
per_image = []
for train_annotations_file in train_annotations_files:
    with open(os.path.join(_train_annotations_path, train_annotations_file)) as f:
        annotation = json.load(f)
    labels = annotation['labels']
    per_image.append(len(labels))
    for label in labels:
        if label['category'] in per_category:
            per_category[label['category']]+=1
        else:
            per_category[label['category']]=1

category_names = ()
vals = ()
for category in per_category:
    category_names+=(category,)
    vals+=(per_category[category],)

category_names

('Car',
 'Bicycle',
 'Pedestrian',
 'Signal',
 'Signs',
 'Truck',
 'Bus',
 'SVehicle',
 'Motorbike',
 'Train')

In [3]:
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# class BboxDataset(GetterDataset):
class BboxDataset():
    def __init__(self, img_dir, annotation_dir, categories, img_ext='.jpg', annotation_ext='.json'):
        super(BboxDataset, self).__init__()
        
        self.names = [i.split('.')[0] for i in os.listdir(img_dir)]
        self.img_dir = img_dir
        self.annotation_dir = annotation_dir
        self.categories = categories
        self.img_ext = img_ext
        self.annotation_ext = annotation_ext
        #self.add_getter('img', self.get_image)
        #self.add_getter(('bbox', 'label'), self.get_annotation)
    
    def __len__(self):
        return len(self.names)
    
    def get_image(self, i):
        name = self.names[i]
        img_path = os.path.join(self.img_dir, name+self.img_ext)
        #img = _read_image_pil(img_path, color=True)
        img = Image.open(img_path)
        img = np.asarray(img)
        
        return img
    
    def get_annotation(self, i):
        name = self.names[i]
        annotation_path = os.path.join(self.annotation_dir, name+self.annotation_ext)
        with open(annotation_path) as f:
            annotation = json.load(f)
        bbox = []
        label = []
        
        for l in annotation['labels']:
            if l['category'] in self.categories:
                bb = l['box2d']
                bbox.append([bb['y1'], bb['x1'], bb['y2'], bb['x2']])
                label.append(self.categories.index(l['category']))
        bbox = np.array(bbox).astype(np.float32)
        label = np.array(label).astype(np.int32)
        
        return bbox, label, name

def show_img_box(data, id):
    """クラスごとにBounding Boxの色を変える"""
    img = data.get_image(id)
    bbox, label, name = data.get_annotation(id)
    for i in range(bbox.shape[0]):
        b = bbox[i]
        l = label[i]
        #print(b, data.categories[l])
        if l==0:
            col = (255, 0, 0)
        elif l==1:
            col = (0, 255, 0)
        elif l==2:
            col = (0, 0, 255)
        elif l==3:
            col = (100, 255, 0)
        elif l==4:
            col = (100, 100, 0)
        elif l==5:
            col = (100, 100, 100)
        elif l==6:
            col = (50, 100, 100)
        elif l==7:
            col = (50, 100, 50)
        elif l==8:
            col = (50, 50, 100)
        elif l==9:
            col = (50, 50, 50)
        cv2.rectangle(img, (b[1], b[0]), (b[3], b[2]), col, 5)
        #cv2.rectangle(img, (int(b[1])-1, int(b[0])+10), (int(b[1])+150, int(b[0])-50), (255, 255, 255), -1)
        cv2.putText(img, data.categories[l], (b[1], b[0]), cv2.FONT_HERSHEY_SIMPLEX, 2, col, 5)    
    print(name)
    plt.imshow(img)
    plt.show()

In [4]:
data = BboxDataset(_train_images_path, _train_annotations_path, category_names)

#for id in range(len(train_annotations_files)):
for id in range(15):
    #print(id)
    bbox, label, name = data.get_annotation(id)
    df = pd.DataFrame(bbox)
    df.columns = ('y', 'x', 'y2', 'x2')
    df['label_id'] = label
    #df['file_name'] = os.path.join('ssd_train', name+'.jpg')
    df['file_name'] = os.path.join(name+'.jpg')
    #print(df)
    if id == 0:
        anno_df = df
    else:
        anno_df = pd.concat([anno_df, df])
        
anno_df.to_csv('anno_df_base.csv', sep=',', index=False)

print('aaaa\n', anno_df.head())
anno_df['width'] = anno_df['x2'] - anno_df['x']
anno_df['height'] = anno_df['y2'] - anno_df['y']
#anno_df = anno_df.rename(columns={'x1': 'x', 'y1': 'y'})
print('bbbb\n', anno_df.head())
# label_id列追加
anno_df = anno_df.loc[:,['file_name','x','y', 'width', 'height', 'label_id']]
anno_df['x'] = anno_df['x'].astype(np.int64)
anno_df['y'] = anno_df['y'].astype(np.int64)
anno_df['width'] = anno_df['width'].astype(np.int64)
anno_df['height'] = anno_df['height'].astype(np.int64)
        
anno_df = anno_df.reset_index(drop=True)
anno_df.to_csv('xywh_train.csv', sep=',', header=False, index=False)
print('cccc\n', anno_df.head())

aaaa
        y      x     y2     x2  label_id        file_name
0  573.0  925.0  628.0  995.0         0  train_00000.jpg
0  620.0    0.0  691.0  165.0         1  train_00001.jpg
1  581.0  142.0  746.0  211.0         2  train_00001.jpg
2  555.0  369.0  731.0  432.0         2  train_00001.jpg
3  560.0  772.0  620.0  806.0         2  train_00001.jpg
bbbb
        y      x     y2     x2  label_id        file_name  width  height
0  573.0  925.0  628.0  995.0         0  train_00000.jpg   70.0    55.0
0  620.0    0.0  691.0  165.0         1  train_00001.jpg  165.0    71.0
1  581.0  142.0  746.0  211.0         2  train_00001.jpg   69.0   165.0
2  555.0  369.0  731.0  432.0         2  train_00001.jpg   63.0   176.0
3  560.0  772.0  620.0  806.0         2  train_00001.jpg   34.0    60.0
cccc
          file_name    x    y  width  height  label_id
0  train_00000.jpg  925  573     70      55         0
1  train_00001.jpg    0  620    165      71         1
2  train_00001.jpg  142  581     69     165   

## dtc_train_module.py

In [1]:
%%time

import sys
sys.path.append(r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\01_google_drive_dl\SSD_code_20190107')
import dtc_train_module

# 正解の座標（ファイル名, x, y, width, height）一覧のcsvファイル
master_file = "xywh_train_no0id.csv"
# 訓練用画像が入っているフォルダ
train_dir = r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\01_google_drive_dl\SSD_code_20190107\ssd_train'
# 評価用画像が入っているフォルダ
test_dir = r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\01_google_drive_dl\SSD_code_20190107\ssd_test'
# モデルファイルの保存先パス
model_path = r'weight_ssd_best.hdf5'

epochs = 5         # エポック数
batch_size = 5     # バッチサイズ
base_lr = 1e-3      # 学習率初期値
num_classes = 10+1   # クラス数は背景とそれ以外クラス

# SSDで学習
dtc_train_module.train_SSD300_NAG(master_file, train_dir, test_dir, model_path
                                  , epochs=epochs
                                  , batch_size=batch_size
                                  , base_lr=base_lr
                                  , num_classes=num_classes
                                 )

Using TensorFlow backend.


ssd_vgg
Train Items : 10
Test  Items : 5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
 - 5s - loss: 7.9047 - val_loss: 7.7247

Epoch 00001: val_loss improved from inf to 7.72470, saving model to weight_ssd_best.hdf5
Epoch 2/5
 - 1s - loss: 6.5599 - val_loss: 6.9433

Epoch 00002: val_loss improved from 7.72470 to 6.94326, saving model to weight_ssd_best.hdf5
Epoch 3/5
 - 1s - loss: 6.6571 - val_loss: 7.6147

Epoch 00003: val_loss did not improve
Epoch 4/5
 - 1s - loss: 6.4618 - val_loss: 6.7158

Epoch 00004: val_loss improved from 6.94326 to 6.71580, saving model to weight_ssd_best.hdf5
Epoch 5/5
 - 1s - loss: 5.7466 - val_loss: 6.4849

Epoch 00005: val_loss improved from 6.71580 to 6.48491, saving model to weight_ssd_best.hdf5
Elapsed Time : 0 hr 0 min 12 sec
Wall time: 24.4 s


## dtc_predict_module.py

In [4]:
%%time

import sys
sys.path.append(r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\01_google_drive_dl\SSD_code_20190107')
%matplotlib inline
import dtc_predict_module

predict_dir = r'ssd_test'
predicted_dir = r'predicted_images'

model_path = r'weight_ssd_best.hdf5'
dict = {0.0:"other", 1.0:"Car", 2.0:"Bicycle", 3.0:"Pedestrian", 4.0:"Signal", 5.0:"Signs", 6.0:"Truck"
        , 7.0:'Bus', 8.0:'SVehicle', 9.0:'Motorbike', 10.0:'Train'}

# 0.6ぐらいがギリギリの閾値みたいだった
conf_threshold=0.9

# 検出できるまで閾値下げるか
#is_conf_threshold_down=False
is_conf_threshold_down=True

# SSDではない別の分類モデルで検出領域predictする場合
import keras
class_model = None
#class_model = keras.models.load_model(r'D:\work\AI_Edge_Contest\object_detect\classes\trained_results\class_0_5_model_InceptionResNetV2+SE_epoch10_from_02_keras_py\finetuning.h5'
#                                      , compile=False)
dict_class = {0.0:"Car", 1.0:"Bicycle", 2.0:"Pedestrian", 3.0:"Signal", 4.0:"Signs", 5.0:"Truck"}
img_height = 331
img_width = 331

dtc_predict_module.dtc_predict_py_edit(predict_dir
                                       , predicted_dir
                                       , dict
                                       , model_path
                                       , conf_threshold=conf_threshold
                                       , is_conf_threshold_down=is_conf_threshold_down
                                       , class_model=class_model
                                       , dict_class=dict_class
                                       , img_height=img_height, img_width=img_width
                                      )

ssd_vgg
<keras.engine.training.Model object at 0x000001E4AC208AC8>


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.36it/s]


Wall time: 5.15 s


<Figure size 432x288 with 0 Axes>

## dtc_train.py

In [8]:
import os
import glob
import time
#from dtc_util import get_correct_boxes
from dtc_util_edit import get_correct_boxes
from dtc_generator import Generator
#from ssd_vgg import num_classes, input_shape
from ssd_vgg import input_shape
from ssd_vgg import create_model, freeze_layers, create_prior_box
from ssd_training import MultiboxLoss
from ssd_utils import BBoxUtility
import keras
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.utils.vis_utils import plot_model

epochs = 20        # エポック数
batch_size = 5     # バッチサイズ
base_lr = 1e-3     # 学習率初期値

num_classes = 10+1
print(num_classes)

# 最適化関数
# optimizer = keras.optimizers.Adam(lr=base_lr)
# optimizer = keras.optimizers.RMSprop(lr=base_lr)
optimizer = keras.optimizers.SGD(lr=base_lr, momentum=0.9, decay=1e-6, nesterov=True)

# 学習率のスケジュール関数
def schedule(epoch, decay=0.90):
    return base_lr * decay**(epoch)

# 正解の座標（ファイル名, x, y, width, height）一覧のcsvファイル
master_file = "xywh_train.csv"
# 訓練用画像が入っているフォルダ
train_dir = "ssd_train"
# 評価用画像が入っているフォルダ
test_dir = "ssd_test"

# 画像ファイル名を指定すると正解座標が返ってくる辞書を作成
correct_boxes = get_correct_boxes(master_file, train_dir, test_dir, num_classes=num_classes)

# 画像ファイルパス一覧取得
train_path_list = glob.glob(os.path.join(train_dir, "*.*"))
test_path_list = glob.glob(os.path.join(test_dir, "*.*"))

# モデル作成
model = create_model(num_classes=num_classes)
model.load_weights("weights_SSD300.hdf5", by_name=True)

# 入力付近の層をフリーズ
freeze_layers(model, depth_level=1)

model.compile(optimizer=optimizer,
              loss=MultiboxLoss(num_classes).compute_loss)
#model.summary()
plot_model(model, "model_ssd.png")

# デフォルトボックス作成
priors = create_prior_box()

# 画像データのジェネレータ作成
bbox_util = BBoxUtility(num_classes, priors)
gen = Generator(correct_boxes, bbox_util,
                train_path_list, test_path_list,
                (input_shape[0], input_shape[1]),
                batch_size)

print("Train Items : {}".format(gen.train_batches))
print("Test  Items : {}".format(gen.val_batches))

# コールバック設定
callbacks = [ModelCheckpoint("weight_ssd_best.hdf5", verbose=1,
                             save_weights_only=True,
                             save_best_only=True),
             LearningRateScheduler(schedule)]

## 学習開始
#start_time = time.time()
#model.fit_generator(gen.generate(True),
#                    gen.train_batches//batch_size,
#                    epochs=epochs,
#                    verbose=1,
#                    callbacks=callbacks,
#                    validation_data=gen.generate(False),
#                    validation_steps=gen.val_batches//batch_size)
#end_time = time.time()
#
## 経過時間表示
#elapsed_time = end_time - start_time
#print("Elapsed Time : {0:d} hr {1:d} min {2:d} sec".
#      format(int(elapsed_time // 3600), int((elapsed_time % 3600) // 60), int(elapsed_time % 60)))
#

11
ssd_vgg
Train Items : 10
Test  Items : 5


In [2]:
gen.generate(True).__next__()

(array([[[[   5.90125794,  -47.83971842,  -79.68589861],
          [  60.16581556,   -3.5259912 ,  -51.12765348],
          [  76.83894718,   11.48866748,  -39.42994065],
          ...,
          [ -40.00757249,  -73.8468916 ,  -98.22995435],
          [ -44.71237118,  -79.38092676, -102.93475305],
          [ -50.15708737,  -87.31335234, -108.37946924]],
 
         [[ -10.82055763,  -62.07382817,  -89.77382962],
          [   5.91399371,  -46.99774976,  -82.16086865],
          [  58.39476711,   -1.98010096,  -46.26481738],
          ...,
          [ -40.81984152,  -73.82992416,  -99.0422198 ],
          [ -43.91238612,  -79.41017816, -101.30553152],
          [ -46.98081475,  -85.79555265, -103.54472369]],
 
         [[  10.4622681 ,  -35.81559081,  -78.4418343 ],
          [ -14.14198153,  -59.59060039,  -93.09524993],
          [   5.25870807,  -51.79921414,  -83.64539433],
          ...,
          [ -48.70136114,  -81.71145094, -106.92374658],
          [ -44.66413926,  -80.991164

In [3]:
gen.boxes

{'train_00000.jpg': array([[0.47778926, 0.47121711, 0.51394628, 0.51644737, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 1.        ]]),
 'train_00001.jpg': array([[0.        , 0.50986842, 0.08522727, 0.56825658, 1.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        ],
        [0.07334711, 0.47779605, 0.1089876 , 0.61348684, 0.        ,
         1.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        ],
        [0.19059917, 0.45641447, 0.2231405 , 0.60115132, 0.        ,
         1.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        ],
        [0.39876033, 0.46052632, 0.41632231, 0.50986842, 0.        ,
         1.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.   

In [4]:
train_path_list

['ssd_train\\train_00004.jpg',
 'ssd_train\\train_00007.jpg',
 'ssd_train\\train_00002.jpg',
 'ssd_train\\train_00008.jpg',
 'ssd_train\\train_00001.jpg',
 'ssd_train\\train_00005.jpg',
 'ssd_train\\train_00000.jpg',
 'ssd_train\\train_00006.jpg',
 'ssd_train\\train_00003.jpg',
 'ssd_train\\train_00009.jpg']

## dtc_predict.py

In [3]:
import os
import glob
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from scipy.misc import imread
from keras.preprocessing import image
from ssd_utils import BBoxUtility
from ssd_vgg import num_classes, input_shape
from ssd_vgg import create_model, preprocess_input
from dtc_util import load_img

# 検出したとする確信度のしきい値
conf_threshold = 0.8
# 予測する画像が入っているフォルダ
predict_dir = "ssd_test"
# 予測する画像のパス一覧
img_path_list = glob.glob(os.path.join(predict_dir, "*.*"))

# 予測結果を保存するフォルダ
predicted_dir = "predicted_images"
if not os.path.isdir(predicted_dir):
    os.mkdir(predicted_dir)

file_names = []  # ファイル名一覧
inputs = []      # ネットワークへ入力するため指定サイズに変形済みの画像データ
images_h = []    # オリジナルサイズの画像の縦幅
images_w = []    # オリジナルサイズの画像の横幅
images = []      # 結果を見るためのオリジナルサイズの画像データ

# 画像情報取得
for path in img_path_list:
    file_names.append(os.path.basename(path))
    img, height, width = load_img(path, target_size=input_shape)
    img = image.img_to_array(img)
    inputs.append(img.copy())
    images_h.append(height)
    images_w.append(width)
    temp_image = imread(path)
    images.append(temp_image.copy())

# 入力画像前処理
inputs = preprocess_input(np.array(inputs))

# モデルロード
model = create_model(num_classes)
model.load_weights("weight_ssd_best.hdf5")

# 予測実行
pred_results = model.predict(inputs, batch_size=8, verbose=0)
bbox_util = BBoxUtility(num_classes)
bbox_results = bbox_util.detection_out(pred_results)

import pandas as pd
# 空のデータフレーム作成
pred_df = pd.DataFrame(index=[], columns=['file_names', 'conf', 'label_name', 'x', 'y', 'w', 'h'])

for file_no in range(len(file_names)):
    print(file_names[file_no])

    # 元の画像を描画
    plt.imshow(images[file_no] / 255.)

    # 予想したボックスの情報を取得
    bbox_label = bbox_results[file_no][:, 0]
    bbox_conf = bbox_results[file_no][:, 1]
    bbox_xmin = bbox_results[file_no][:, 2]
    bbox_ymin = bbox_results[file_no][:, 3]
    bbox_xmax = bbox_results[file_no][:, 4]
    bbox_ymax = bbox_results[file_no][:, 5]

    # 確信度がしきい値以上のボックスのみ抽出
    top_indices = [i for i, conf in enumerate(bbox_conf) if conf > conf_threshold]
    if len(top_indices) == 0:
        continue

    img_h = images_h[file_no]
    img_w = images_w[file_no]
    currentAxis = plt.gca()

    for box_no, top_index in enumerate(top_indices):
        # 予想したボックスを作成
        x = int(bbox_xmin[top_index]*img_w)
        y = int(bbox_ymin[top_index]*img_h)
        w = int((bbox_xmax[top_index]-bbox_xmin[top_index])*img_w)
        h = int((bbox_ymax[top_index]-bbox_ymin[top_index])*img_h)
        box = (x, y), w, h

        # 予想したボックスを描画
        conf = float(bbox_conf[top_index])
        label_name = ""
        display_txt = '{:0.2f}, {}'.format(conf, label_name)
        currentAxis.add_patch(plt.Rectangle(*box, fill=False, edgecolor="red", linewidth=2))
        currentAxis.text(x, y, display_txt, bbox={'facecolor': "red", 'alpha': 0.2})
        
        # 結果をデータフレームで保持
        series = pd.Series([file_names[file_no], conf, label_name, x, y, w, h], index=pred_df.columns)
        pred_df = pred_df.append(series, ignore_index = True)

    # 予測結果の画像ファイルを保存
    plt.savefig(os.path.join(predicted_dir, file_names[file_no]))
    plt.clf()
    
pred_df.to_csv('pred.csv', sep='\t', index=False)

Using TensorFlow backend.
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.


ssd_vgg
train_00010.jpg
train_00011.jpg
train_00012.jpg
train_00013.jpg
train_00014.jpg
