## Export materiall from images

In [4]:
## export layers to dir named after file
import os
for file in sorted(os.listdir('images')):
    base, ext = os.path.splitext(file)
    if ext == '.svg' and base != 'test' and base != 'stack':
        print(file)
        os.system("./export_svg.sh %s" % file)
        print(os.listdir('images/m_%s' % base))

1_PS.svg
['A-3-1.png', 'A-2-1-2-1.png']
2_PSB.svg
['A-1.png', 'A-3-1.png', 'A-2-1-2-2.png']
3_PSB1.svg
['A-1.png', 'A-2-2-1.png', 'A-3-2.png']
4_PSD.svg
['A-1.png', 'A-3-2-1.png', 'A-2-1-1.png']
5_S.svg
['A-2-1-1.png']
6_SB.svg
['A-1.png', 'A-2-1-2.png']


In [15]:
def group_layers(prefix='m_', target='material', image_dir='images'):
    """
    Collect layers in separate dirs into material
    Extend name if necessary
    """
    import glob
    import shutil
    if os.path.exists(target):
        files = glob.glob(target+'/*')
        for file in files:
            os.remove(file)
    else:
        os.system('mkdir '+target)

    for d_ in sorted(os.listdir(image_dir)):
        d = image_dir + '/' + d_
        if os.path.isdir(d) and d_.startswith(prefix):
            print(d)
            for m in os.listdir(d):
                print('  %s' % m)
                if m in os.listdir(target):
                    base, ext = os.path.splitext(m)
                    num = len(glob.glob('%s/%s*.png' % (target,
                                                        base)))
                    new_m = '%s_(%s)%s' % (base, num, ext)
                else:
                    new_m = m
                shutil.move(d+'/'+m, target+'/'+new_m)
            os.rmdir(d)

In [16]:
group_layers()

images/m_1_PS
  A-3-1.png
  A-2-1-2-1.png
images/m_2_PSB
  A-1.png
  A-3-1.png
  A-2-1-2-2.png
images/m_3_PSB1
  A-1.png
  A-2-2-1.png
  A-3-2.png
images/m_4_PSD
  A-1.png
  A-3-2-1.png
  A-2-1-1.png
images/m_5_S
  A-2-1-1.png
images/m_6_SB
  A-1.png
  A-2-1-2.png


In [17]:
# Weird!
# every time use svg-objects-export.py to export a .svg, or use parse in xml to read a .svg, the objects in this image will move  to the right for a random length, but it won't affect the next read or parse, and it won't affect the appearance of exported .png files
# also, use parse, export or even open the .svg in the browser, the layers will be stacked into one layer on the very top
# seems a problem with minidom or inkscape

## Image reader

In [98]:
from xml.dom.minidom import parse
import os
import numpy as np
import glob

In [99]:
# rewrite this
# if one layer: id with svg
# if multiple layers: the first <g> with id

In [107]:
def getLayerNames(file):
    doc = parse(file)
    svg_list = doc.getElementsByTagName('svg')
    assert(len(svg_list) == 1)
    svg = svg_list[0]
    layers = [g for g in svg.childNodes \
              if g.nodeType == 1 and \
                 g.tagName == 'g' and \
                 g.hasAttribute('id')]
    if layers:
        return [l.getAttribute('id') for l in layers]
    else:
        if svg.hasAttribute('id'):
            # if single layer case, id belongs to <svg>
            assert(len([g for g in svg.childNodes \
                          if g.nodeType == 1 and \
                             g.tagName == 'g']) == 1)
            return [svg.getAttribute('id')]
        else:
            raise ValueError('No valid id name found!')

def name2code(name):
    """
    input: A-1-2-3-4
    output: [1,2,3,4]
    """
    return [int(d) for d in name.split('-')[1:]]
    
def checkLayerNames(names):
    ### styling check
    # names of layers must start with 'A-'
    for name in names:
        assert(name.startswith('A-')), "%s does not start with A-" % name
    
    # names of layers should match \d-\d
    import re
    for name in names:
        assert(re.match(r'A(-\d)+', name)), "%s does not match the pattern!" % name
    
    ### sanity check
    cat_codes = [name2code(s)[0] for s in names]
    
    # if background in, it must be the most bottom layer
    if 1 in cat_codes:
        assert(cat_codes.index(1) == 0), 'background should be the most bottom!'
        
    # if decoration in, it must be the toppest layer
    if 4 in cat_codes:
        assert(cat_codes.index(4) == len(cat_codes) - 1), 'decoration should be the most top!'
    
    if len(cat_codes) == 1:
        # if only one layer, must be surrounding layer
        assert(cat_codes[0] == 2), 'it must be the surrounding layer if there is only one layer'
    else:
        # if multiple layers, check the cat order
        # background 1 - surroundings 2 - person 3 - decoration 4    
        for code1, code2 in zip(cat_codes[:-1], cat_codes[1:]):
            assert(code2 > code1), \
               'layer %s should not be below layer %s!' % (code2, code1)

In [108]:
for fileName in sorted(glob.glob('images/*.svg')):
    # base, ext = os.path.splitext(fileName)
    # if ext == '.svg' and base != 'test' and base != 'stack':
    if 'test' in fileName or 'stack' in fileName:
        continue
    print(fileName)
    # print(get_id_list(fileName))
    names = getLayerNames(fileName)
    checkLayerNames(names)
    print(names)

images/1_PS.svg
['A-2-1-2-1', 'A-3-1']
images/2_PSB.svg
['A-1', 'A-2-1-2-2', 'A-3-1']
images/3_PSB1.svg
['A-1', 'A-2-2-1', 'A-3-2-3']
images/4_PSD.svg
['A-1', 'A-2-1-1', 'A-3-2-1']
images/5_S.svg
['A-2-1-1']
images/6_SB.svg
['A-1', 'A-2-1-2-3']


In [103]:
# Caveats
# The first <image> element in a <g> is the shadow, namely the extra .png file in the dir

## Image embedding

In [119]:
from itertools import count

def recurReplace(nested, id_list, value=1):
    """
    Replace an element in a nested list recursively
    """
    # number of indexes should be lower than the depth
    assert(len(id_list) <= getDepth(nested))
    
    # print(nested, '--', id_list)
    if len(id_list) > 1:  
        recurReplace(nested[id_list[0]], id_list[1:])
    else:
        nested[id_list[0]] = value
        
def getDepth(li):
    """
    Get the depth of a nested list
    """
    for level in count():
        if not li:
            return level
        li = [e for l in li if isinstance(l, list) for e in l]

def isPureList(l):
    """
    Check if a list is not a nested one
    """
    for e in l:
        if isinstance(e, list):
            return False
    return True

def extractLeaf(feat, depth, concat=[], level=0):
    """
    Extract the leaf lists in the same level in a nested list
    """
    level += 1
    # print('  ', level, depth, concat)
    for i, e in enumerate(feat):
        if isinstance(e, list):
            if isPureList(e):
                # lowest level only
                if level == depth - 1:
                    concat.extend(e)
                    feat[i] = sum(e)
            else:
                feat[i], concat = extractLeaf(e, depth, concat, level)
    return feat, concat

def oneHotStruct(feat):
    """
    Given the nested features,
        encode each category level into one-hot
    """
    concats = []
    while not isPureList(feat):
        feat, concat = extractLeaf(feat,
                                   getDepth(feat),
                                   concat=[],
                                   level=0)
        # print(feat, concat)
        ## use append to differ between sources
        # concats.append(concat)
        concats.extend(concat)
    # concats.append(feat)    
    concats.extend(concat)
    return concats[::-1]

def code2indslist(code):
    """
    Turn a code into the reference indexes of a nested list
        Eg. [2,1,2,1] -> [0,1,0]
        the first number is the ident code of the layer type
    """
    return [d-1 for d in code[1:]]

def image2feature(layer_names):
    
    features = []
    
    # number of layers
    features.append(len(layer_names))
    
    # convert to digit codes first
    codes = [name2code(name) for name in layer_names]
    
    # four layer type, binary
    cat_feat = [0] * 4
    for code in codes:
        cat_feat[code[0] - 1] = 1
    features.append(cat_feat)
    
    # sub-categories: one-hot for each level 
    for code in codes:
        if code[0] == 1 or code[0] == 4:
            # background or decoration, no need to encode
            assert(len(code) == 1)
        
        elif code[0] == 2:
            # surrounding
            # initilize features based on grouping table
            feat = [[0,[0,0,0]],[0,[0,0,0],[0,0]]]
            recurReplace(feat, code2indslist(code))
            features.append(oneHotStruct(feat))
        
        elif code[0] == 3:
            # person
            feat = [0,[0,0,0],[0,0,0,0]]
            recurReplace(feat, code2indslist(code))
            features.append(oneHotStruct(feat))           
        
    return features

In [120]:
print(image2feature(['A-2-1-2-1', 'A-3-1', 'A-4']))

[3, [0, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


### test text descriptions

In [21]:
['A man is lying on the sofa.',
 'A man is sitting next to a computer.',
 'A man is presenting a chart.',
 'A woman is standing next to a bucket.',
 'A deer in a sock',
 'A christmas tree with presents']

['A man is lying on the sofa.',
 'A man is sitting next to a computer.',
 'A man is presenting a chart.',
 'A woman is standing next to a bucket.',
 'A deer in a sock',
 'A christmas tree with presents']

## Renderer

In [42]:
def str2num_size(size):
    assert type(size) is str
    return int(float(size.strip('pt')))
    
def get_size(file):
    """
    Get the width and height of an image
    
    .svg:
        use xml's parse
        By check the 'viewBox' attribute in the 'svg' tag
    
    .png:
        use PIL
    """
    
    basename, ext = os.path.splitext(file)
    if ext == '.svg':   
        from xml.dom.minidom import parse
        doc = parse(file)
        # search svg element
        image_list = doc.getElementsByTagName('svg')
        assert(image_list), 'no svg element found!'
        assert(len(image_list) == 1), file
        img = image_list[0]
        assert(img.hasAttribute('viewBox'))
        width, height = img.getAttribute('viewBox').split()[2:]
        width, height = (str2num_size(width), str2num_size(height))
        # try search image element, then g element
#     try:
#         assert image_list
#     except AssertionError:
#         image_list = doc.getElementsByTagName('svg')
#         assert image_list
#         assert image_list[0].hasAttribute('width')
#     assert(len(image_list) == 1), svg_file
#     ## if multiple images, size is the maximum size in either direction        
    elif ext == '.png':
        from PIL import Image
        width, height = Image.open(file).size

    return (width, height)

In [43]:
def stack_svgs(file_list, opt_file=None, canvas_size=None):
 
    import cairosvg
    from svgutils.compose import Figure, Image #,SVG
    
    if not opt_file:
        opt_file = 'stack.svg'
        
    if canvas_size:
        canvas_w, canvas_h = canvas_size
    else:
        canvas_w, canvas_h = 0, 0
        for file in file_list:
            width, height = get_size(file)
            if width > canvas_w: canvas_w = width
            if height > canvas_h: canvas_h = height
    print('Canvas size:', (canvas_w, canvas_h))
    
    # if svg, convert to png first
    file_list_png = []
    for file in file_list:
        basename, ext = os.path.splitext(file)
        if ext == '.svg':
            png_file = basename + '.png'
            cairosvg.svg2png(url=file,
                             write_to=png_file)
            file_list_png.append(png_file)
        elif ext == '.png':
            file_list_png.append(file)
        else:
            raise ValueError('File type not availale!')
    
    image_list = []
    # 
    for file in file_list_png:
        print('File:', file)
        width, height = get_size(file)
        img = Image(width, height, file)
        img.move(int((canvas_w-width)/2),
                 int((canvas_h-height)/2))
        image_list.append(img)
    
        
    Figure(canvas_w, canvas_h, *image_list).save(opt_file)

In [46]:
stack_svgs(['svg-objects-export/A-1.png', 
            'svg-objects-export/A-2-1-2-1.png', 
            'svg-objects-export/A-3-1.png'])

Canvas size: (1114, 786)
File: svg-objects-export/A-1.png
File: svg-objects-export/A-2-1-2-1.png
File: svg-objects-export/A-3-1.png


In [45]:
stack_svgs(['material/背景4.svg', 
            'material/123.svg', 
            'material/生活方式3.svg'])

Canvas size: (1145, 816)
File: material/背景4.png
File: material/123.png
File: material/生活方式3.png




In [13]:
## write a tool to stack two svgs:
### put <g> altogether, then for each elememt, add attribute recursively

In [354]:
## ValueError: material/124.svg
## issue of cairosvg converter

## Test

In [132]:
doc = parse('SB.svg')
# ele = [c for c in doc.childNodes if c.nodeType == 1]
# assert(len(ele) == 1)
# [n.getAttribute('id') for n in ele[0].childNodes if n.nodeType==1 and n.tagName=='g']
# [g.getAttribute('id') for g in doc.getElementsByTagName('g') if g.hasAttribute('id')]
# id_list = [g.getAttribute('id') for g in doc.getElementsByTagName('g') if g.hasAttribute('id')]
# assert(id_list)

# get the tagname: element.tagName

In [76]:
# the order may be reversed, that's weird
# at least. background must be at the bottom
# but what if there are only person and surronding

# it's reversed
# because the first object is at the bottom

In [74]:
doc.getElementsByTagName('image')

[<DOM Element: image at 0x112681cc0>]

In [56]:
[g.getAttribute('id') for g in doc.getElementsByTagName('*') if g.hasAttribute('id')]

['_x31__x5F_1_x5F_1']