In [1]:
from glob import glob
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import time

### About data type functions

In [2]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [3]:
def image_example(imageString, label, name):
    imageShape = tf.image.decode_jpeg(imageString).shape

    feature = {
        'image_raw': _bytes_feature(imageString), #이미지는 0~255의 3차원값들
        'landmark_id': _int64_feature(label), #랜드마크 분류값
        'id':_bytes_feature(name) #이미지 이름
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))

### About file names

In [4]:
def city_name(n):
    cityFolder = glob("./train/*")
    return cityFolder[n].split('/')[-1]

In [5]:
def file_name_by_city(n):
    cityFolder = glob("./train/*")
    folderNames = glob(cityFolder[n] +'/*')
    print('folder_num : ',len(folderNames))
    return folderNames

In [6]:
def get_image_location(folderNames):
    imageLabels = {}
    for foidx in range(len(folderNames)):
        fileNames = glob(folderNames[foidx]+'/*')
        for i in fileNames:
            imageLabels[i] = pictureCodeDic[i.split('/')[-1].split('.')[0]]
    print("file_num : ", len(imageLabels))
    return imageLabels

In [7]:
#write

In [60]:
def convert_func(imageLabels, maxiter = 1e+6):
    start = time.time()
    it = 0
    for filename, label in tqdm(imageLabels.items()):
        if(it>=maxiter):
            break
        imagename = filename.split('/')[-1].split('.')[0]
        recordFile = './trainset_/train_'+imagename+'.tfrecords' #file name

        with tf.io.TFRecordWriter(recordFile) as writer:
            imageString = open(filename, 'rb').read()
            tfExample = image_example(imageString, label, imagename.encode())
            writer.write(tfExample.SerializeToString())
        it+=1
    print(time.time() -start, '(s)')

In [9]:
#picture name to code
pictureCode = pd.read_csv("train.csv")
pictureCodeDic = {}
for i in tqdm(range(len(pictureCode))):
    pictureCodeDic[pictureCode.iloc[i]['id']] = pictureCode.iloc[i]['landmark_id']

100%|██████████| 88102/88102 [00:21<00:00, 4165.02it/s]


In [10]:
city_name(3)

'부산시'

In [11]:
folderNames = file_name_by_city(3)

folder_num :  127


In [12]:
imageLabels = get_image_location(folderNames)

file_num :  10519


In [61]:
convert_func(imageLabels)

 10%|▉         | 1000/10519 [00:22<03:38, 43.62it/s]

22.92837929725647 (s)





### 1000개만

In [67]:
uniqueLabels = set(list(imageLabels.values())[:1000])

In [68]:
labelDic = {}

In [69]:
for idx, u in enumerate(uniqueLabels):
    labelDic[u] = idx

In [72]:
newLabels = {}

In [73]:
for key, value in list(imageLabels.items())[:1000]:
    newLabels[key] = labelDic[value]

In [75]:
convert_func(newLabels)

100%|██████████| 1000/1000 [00:05<00:00, 168.31it/s]

5.94474458694458 (s)



