In [1]:
import os
import cv2
import json
import numpy as np
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from bert4keras.backend import keras, K

Using TensorFlow backend.


In [2]:
coco_train_data_path = './data/MSCOCO/annotation/regionfiles/train2014/'
coco_val_data_path = './data/MSCOCO/annotation/regionfiles/val2014/'

save_train_file = './data/MSCOCO/annotation/features/train2014/'
save_val_file = './data/MSCOCO/annotation/features/val2014/'

In [4]:
# 图像模型
preprocessing_image = keras.preprocessing.image
preprocess_input = keras.applications.resnet.preprocess_input
image_model = keras.applications.resnet.ResNet101(include_top=False, weights='imagenet', pooling='avg')

In [6]:
def generate_object_data(folder, train=True):
    """读取并整理COCO的数据,提取目标特征.
    [
     {'image_features': [2048]},
     {'key_words': str, 'caption': str},
     {'key_words': str, 'caption': str},
     ...
    ]
    """
    
    files = os.listdir(folder)

    for file in tqdm(files[:10]):
        res = []
            
        image_data = json.load(open(folder+file))
        image_id = file.replace('json', 'jpg')
        
        if train:
            img_path = 'data/coco2014/train2014/%s' % image_id
        else:
            img_path = 'data/coco2014/val2014/%s' % image_id
            
        # 计算整张图的特征, keras使用RGB，注意只需要转换一次，切图的时候就不需要了
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        x = np.expand_dims(img, axis=0)
        x = preprocess_input(x)
        image_features = image_model.predict(x)
        res.append({'image_features':image_features.tolist()[-1]})
        
        for image in image_data.values():
            obj = {}
            obj['caption'] = image["phrase"]
            
            key_words = ''
            #计算目标的特征
            for ob in image['objects']:
                key_words += ob['name'] + ' '
                
                obj['key_words'] = key_words
            
            res.append(obj)
        
        if train:
            np.save('./data/MSCOCO/annotation/features/train2014/'+file.replace('json', 'npy'), res)
        else:
            # npy文件最小
            np.save('./data/MSCOCO/annotation/features/val2014/'+file.replace('json', 'npy'), res)

In [8]:
generate_object_data(coco_train_data_path, True)

100%|██████████| 10/10 [00:02<00:00,  4.65it/s]


In [10]:
folder = './data/MSCOCO/annotation/features/train2014/'

files = os.listdir(folder)

data = np.load(folder+files[0], allow_pickle=True)

In [11]:
data

array([{'image_features': [0.439405232667923, 0.8299930095672607, 0.17301341891288757, 0.1563645750284195, 0.2562093436717987, 0.15439428389072418, 0.039770759642124176, 0.07815071940422058, 0.42750877141952515, 0.42212575674057007, 0.2722374498844147, 0.0075561353005468845, 0.15585225820541382, 0.13951820135116577, 0.23674224317073822, 0.12040898203849792, 0.6502857804298401, 0.3489741086959839, 0.1828569918870926, 0.276386559009552, 1.0544675588607788, 0.07200933992862701, 0.06585022062063217, 1.2738630771636963, 0.8698516488075256, 0.24980232119560242, 0.3102017045021057, 1.0942505598068237, 0.5040007829666138, 0.29941678047180176, 0.18385104835033417, 0.16076500713825226, 0.06606091558933258, 0.2817089259624481, 0.016203174367547035, 0.2445075958967209, 0.0913417711853981, 0.35005512833595276, 0.44966837763786316, 0.07017365843057632, 0.7029258608818054, 0.14192979037761688, 0.06596033275127411, 0.46041613817214966, 0.7040941715240479, 0.4828716814517975, 1.445276141166687, 0.12767

In [12]:
len(data)

6