GIST Playground

In [None]:
import os
import glob
import gist
import tqdm
from tqdm.notebook import tqdm

import numpy as np
import matplotlib.image as mpimg

## Clustering

In [None]:
# read data
data_dir = 'data/clustering'
images = {}

for file in glob.glob(data_dir + '/*/*/*'):
    _, _, char, group, _ = file.split('/')
    img = mpimg.imread(file)
    if char not in images:
        images[char] = {}
    if group not in images[char]:
        images[char][group] = []
    images[char][group].append(img)
    
    
def estimate_features(images, extractor):

    features = {}
    for char in images:
        features[char] = {}
        for var in images[char]:
            var_fs = [extractor(image) for image in images[char][var]]
            features[char][var] = list(sum(var_fs) / len(var_fs))
            
    return features

In [None]:
def gist_extractor(image):
    if len(image.shape) == 2:
        image = np.expand_dims(image, 3).repeat(3, axis=2)
        
    nblocks = 4
    ops = (8, 8, 4)
    return gist.extract(
        image.astype('uint8'), 
        nblocks=nblocks, 
        orientations_per_scale=ops)[:nblocks * sum(ops)]
    

In [None]:
gists = estimate_features(images, gist_extractor)
str(gists)

## Clean

In [None]:
# read data
data_dir = 'data/shufadict/clean'
ext_data_dir = 'data/hanwen360/clean'
images = {}

for file in glob.glob(data_dir + '/*/*.png') + (glob.glob(ext_data_dir + '/*/*.png')):
    char = file.split('/')[-2]
    img = mpimg.imread(file)
    if char not in images:
        images[char] = []
    images[char].append(img)

In [None]:
img_gists = {char: [str(list(gist_extractor(img))) for img in tqdm(images[char], desc=str(i))] for i, char in enumerate(images)}

In [None]:
with open('/Users/kx/Desktop/gist.json', 'w') as f:
    json.dump(img_gists, f)