## Image encoder

In [1]:
from tools.image_process import getLayerNames, checkLayerNames, image2feature
import glob
import numpy as np

img_embed = []
layer_names = []
for fileName in sorted(glob.glob('images/*.svg')):
    # base, ext = os.path.splitext(fileName)
    # if ext == '.svg' and base != 'test' and base != 'stack':
    if 'test' in fileName or 'stack' in fileName:
        continue
    print(fileName)
    # print(get_id_list(fileName))
    names = getLayerNames(fileName)
    checkLayerNames(names)
    print(names)
    layer_names.append(names)
    # print(image2feature(names))
    # print(type(image2feature(names)), type(img_embed))
    img_embed.append(image2feature(names))
    
img_embed = np.array(img_embed)
print(img_embed.shape)

images/1.svg
['A2122', 'A3212']
images/10.svg
['A1', 'A2121_1_', 'A323']
images/11.svg
['A1', 'A2121_1_', 'A313']
images/12.svg
['A2121', 'A3211']
images/13.svg
['A2122', 'A3213']
images/14.svg
['A1', 'A323', 'A2222']
images/15.svg
['A1', 'A325']
images/16.svg
['A2221', 'A322']
images/17.svg
['A2121', 'A3213']
images/18.svg
['A1', 'A2121_1_', 'A3212']
images/19.svg
['A2121_1_', 'A3213']
images/2.svg
['A4', 'A2121', 'A324']
images/20.svg
['A2113', 'A311']
images/3.svg
['A4', 'A2122', 'A312']
images/4.svg
['A2211', 'A311']
images/5.svg
['A2212', 'A3211']
images/6.svg
['A2112', 'A311']
images/7.svg
['A2121', 'A324']
images/8.svg
['A311', 'A2121']
images/9.svg
['A2121']
(20, 38)


In [2]:
# Caveats
# The first <image> element in a <g> is the shadow, namely the extra .png file in the dir

In [3]:
## test corpus
test_text = ['A man is lying on the sofa.',
             'A man is sitting next to a computer.',
             'A man is presenting a chart. The light is on. we',
             'A woman is standing next to a bucket.',
             "It's a deer in a sock",
             'A christmas tree with presents']

## text encoder

### reader and vectorizer

In [4]:
# text reader
import glob
from tools.text_process import LemmaTokenizer
tokenizer = LemmaTokenizer()
sentences = []
for fileName in sorted(glob.glob('text/*.txt')):
    print(fileName)
    with open(fileName, 'r') as f:
        sent = f.read()
        # print(sent)
        tokens = tokenizer(sent)
        # print(tokens)
        # print('----')
        sentences.append(tokens)
        
## vectorizer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2),
                             norm=None,
                             sublinear_tf=True,
                             stop_words=[],
                             lowercase=False,
                             tokenizer=lambda l: l)
text_embed = vectorizer.fit_transform(sentences)
print(type(text_embed))
print(text_embed.shape)
# print(vectorizer.fit_transform(sentences).toarray())
print(len(vectorizer.vocabulary_))

text/1.txt
text/10.txt
text/11.txt
text/12.txt
text/13.txt
text/14.txt
text/15.txt
text/16.txt
text/17.txt
text/18.txt
text/19.txt
text/2.txt
text/20.txt
text/3.txt
text/4.txt
text/5.txt
text/6.txt
text/7.txt
text/8.txt
text/9.txt
<class 'scipy.sparse.csr.csr_matrix'>
(20, 103)
103




### Similarity between category keywords and the description
**suppress cats not in image??**

In [12]:
from tools.text_process import keywordSimi
from tools.image_process import image2SimiFeature
import numpy as np

In [13]:
feature_simis = []
for layers, sentence in zip(layer_names, sentences):
    print(sentence)
    feature_simis.append(image2SimiFeature(layers, sentence))

feature_simis = np.array(feature_simis)
print(feature_simis.shape)
print(img_embed.shape)

['man.n.01', 'look.v.01', 'chart.n.01', 'back.n.01']
['man.n.01', 'walk.v.01', 'past.n.01', 'web.n.01', 'icon.n.01']
['man.n.01', 'woman.n.01', 'be.v.01', 'interact.v.01']
['man.n.01', 'lean.v.01', 'set.n.01', 'chart.n.01']
['woman.n.01', 'stand.v.01', 'following.s.02', 'poster.n.01', 'planet.n.01']
['man.n.01', 'pull.v.01', 'sword.n.01', 'rock.n.01']
['woman.n.01', 'be.v.01', 'show.v.01', 'love.v.01']
['man.n.01', 'be.v.01', 'sit.v.01', 'wood.n.01']
['man.n.01', 'stand.v.01', 'following.s.02', 'icon.n.01']
['man.n.01', 'stand.v.01', 'front.n.01', 'chart.n.01', 'show.v.01', 'muscle.n.01']
['woman.n.01', 'stand.v.01', 'front.n.01', 'chart.n.01']
['man.n.01', 'show.v.01', 'chart.n.01']
['man.n.01', 'drink.v.01', 'coffee.n.01', 'airport.n.01', 'window.n.01']
['man.n.01', 'woman.n.01', 'play.n.01', 'circle.n.01', 'compass.n.01']
['girl.n.01', 'be.v.01', 'lean.v.01', 'car.n.01']
['girl.n.01', 'be.v.01', 'stand.v.01', 'edge.n.01', 'city.n.01']
['man.n.01', 'be.v.01', 'clean.v.01', 'computer.

#### all

In [30]:
import scipy

print('--- text')
print(text_embed.shape)
print(type(text_embed))
print('--- image')
print(img_embed.shape)
print(type(img_embed))
print('--- joint')
print(feature_simis.shape)
print(type(feature_simis))
print('---')
embed_all = scipy.sparse.hstack([text_embed, 
                                 img_embed,
                                 feature_simis])
print(embed_all.shape)

--- text
(20, 103)
<class 'scipy.sparse.csr.csr_matrix'>
--- image
(20, 38)
<class 'numpy.ndarray'>
--- joint
(20, 33)
<class 'numpy.ndarray'>
---
(20, 174)


## Test

In [132]:
doc = parse('SB.svg')
# ele = [c for c in doc.childNodes if c.nodeType == 1]
# assert(len(ele) == 1)
# [n.getAttribute('id') for n in ele[0].childNodes if n.nodeType==1 and n.tagName=='g']
# [g.getAttribute('id') for g in doc.getElementsByTagName('g') if g.hasAttribute('id')]
# id_list = [g.getAttribute('id') for g in doc.getElementsByTagName('g') if g.hasAttribute('id')]
# assert(id_list)

# get the tagname: element.tagName

In [76]:
# the order may be reversed, that's weird
# at least. background must be at the bottom
# but what if there are only person and surronding

# it's reversed
# because the first object is at the bottom

In [74]:
doc.getElementsByTagName('image')

[<DOM Element: image at 0x112681cc0>]

In [56]:
[g.getAttribute('id') for g in doc.getElementsByTagName('*') if g.hasAttribute('id')]

['_x31__x5F_1_x5F_1']