In [163]:
%matplotlib inline
import pickle
import os
import png
import numpy as np
import pandas as pd
import shutil
from collections import OrderedDict

In [2]:
rootpath = '/data/cifar100/cifar-100-python/'
targetpath = '/data/cifar100/'
os.listdir(rootpath)

['test', 'meta', 'file.txt~', 'train']

In [186]:
is_write_image = True
dfs = dict()

with open(os.path.join(rootpath,'meta'), 'rb') as f:
    labels = pickle.load(f, encoding="ASCII")
    
for datafile in ('train', 'test'):
    path = os.path.join(rootpath, datafile)
    
    with open(path, 'rb') as f:
        file = pickle.load(f, encoding='bytes')
    
    filenames = [f.decode('ASCII') for f in file[b'filenames']]
    fine_labels = file[b'fine_labels']
    coarse_labels = file[b'coarse_labels']
    data = file[b'data']
    
    output_dir = os.path.join(targetpath, datafile)
    os.makedirs(output_dir, exist_ok=True)
    
    df = pd.DataFrame(OrderedDict({'filenames': filenames,
                               'coarse_labels': list(map(lambda i: labels['coarse_label_names'][i], coarse_labels)),
                               'fine_labels': list(map(lambda i: labels['fine_label_names'][i], fine_labels))}))
    dfs[datafile] = df
    
    #Write the image files
    if is_write_image:
        for i, filename in enumerate(filenames):
            d = data[i].reshape((3,32,32)).transpose((1,2,0))
            with open(os.path.join(output_dir, filename), 'wb') as f:
                png.from_array(d, mode="RGB").save(f)


In [189]:
len(os.listdir(targetpath+'train')) , len(os.listdir(targetpath+'test'))

(49999, 9999)

In [190]:
dfs['train'].shape, dfs['test'].shape

((50000, 3), (10000, 3))

In [191]:
is_combine = True
output_dir = os.path.join(targetpath, 'train_full')

if is_combine:
    os.makedirs(output_dir, exist_ok=True)
    for phase in ('train', 'test'):
        phase_path = os.path.join(targetpath, phase)
        for file in os.listdir(phase_path):
            shutil.move(os.path.join(phase_path, file), os.path.join(output_dir, file))
    
    df = pd.concat([dfs['train'], dfs['test']])
    df.to_csv(f'{targetpath}train_full.csv', compression=None, index=False)
else:
    dfs['train'].to_csv(f'{targetpath}train.csv', compression=None, index=False)
    dfs['test'].to_csv(f'{targetpath}test.csv', compression=None, index=False)

# Extract two class

In [227]:
df_apple_pear = df.loc[(df['fine_labels'] == 'apple') | (df['fine_labels'] == 'pear')]
df_apple_pear.to_csv(f'{targetpath}apple_pear.csv', compression=None, index=False)
df_apple_pear.head()

Unnamed: 0,filenames,coarse_labels,fine_labels
2,mcintosh_s_000643.png,fruit_and_vegetables,apple
176,macoun_s_000065.png,fruit_and_vegetables,apple
270,bartlett_pear_s_000454.png,fruit_and_vegetables,pear
283,bosc_s_000204.png,fruit_and_vegetables,pear
304,red_delicious_s_000552.png,fruit_and_vegetables,apple


In [228]:
filename_apple_pear = df_apple_pear['filenames'].tolist()

src_path = os.path.join(targetpath, 'train_full')
des_path = os.path.join(targetpath, 'apple_pear')

os.makedirs(des_path, exist_ok=True)

for f in filename_apple_pear:
    shutil.copy(os.path.join(src_path, f), des_path)