In [1]:
import xml.etree.ElementTree as ET
import glob

In [2]:
import time
import numpy as np

In [3]:
# we will work with totImages images
totImages = 100

In [4]:
fn = glob.glob("VOC2012/Annotations/*");

In [5]:
images = dict()
cat = set()

In [6]:
start = time.time()

cnt = 0

for f in fn:
    cnt = cnt + 1
    if cnt > totImages:
        break
    
    tree = ET.parse(f)
    root = tree.getroot()

    curFile = ""
    for child in root:
        if child.tag == 'filename':
            curFile = child.text
    images[curFile] = []
    
    for child in root:
        if child.tag == 'object':
            for child2 in child:
                if child2.tag == 'name':
                    images[curFile].append(child2.text)
                    cat.add(child2.text)
                    
print 'Done parsing in ', time.time() - start, " seconds"

Done parsing in  0.0902011394501  seconds


In [7]:
cat = list(cat)
print cat

['sheep', 'horse', 'bicycle', 'bottle', 'cow', 'sofa', 'dog', 'bus', 'cat', 'person', 'train', 'diningtable', 'aeroplane', 'car', 'pottedplant', 'tvmonitor', 'chair', 'bird', 'boat', 'motorbike']


In [8]:
len(cat)

20

In [9]:
catimg = dict()
for curCat in cat:
    catimg[curCat] = []

for img in images:
    for curCat in set(images[img]):
        catimg[curCat].append(img)
        
print catimg['sheep'][:5]

['2007_000925.jpg', '2007_000676.jpg', '2007_000175.jpg', '2007_001416.jpg']


In [10]:
featvec = images.copy()

cnt = 0

for img in images:
    cnt = cnt + 1
    if cnt <= 5:
        print img, images[img]
    
    features = featvec[img]
    
    featvec[img] = [0 for i in range(len(cat))]
    for f in features:
        for j in range(len(cat)):
            if cat[j] == f:
                featvec[img][j] = featvec[img][j] + 1
                break
                
    featvec[img] = np.array(featvec[img])
                
    if cnt <= 5:
        print img, featvec[img]
        print ""

2007_001154.jpg ['sofa', 'pottedplant']
2007_001154.jpg [0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0]

2007_000645.jpg ['bird', 'bird']
2007_000645.jpg [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0]

2007_000061.jpg ['boat', 'boat']
2007_000061.jpg [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0]

2007_000847.jpg ['diningtable', 'person']
2007_000847.jpg [0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0]

2007_000129.jpg ['bicycle', 'bicycle', 'bicycle', 'person', 'person', 'person']
2007_000129.jpg [0 0 3 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]



In [11]:
# compute a dictionary of norms and normdistances
nrm = featvec.copy()
for elem in nrm:
    nrm[elem] = np.linalg.norm(nrm[elem])

nrmdist = dict()
for el1 in featvec:
    for el2 in featvec:
        nrmdist[(el1, el2)] = np.linalg.norm(featvec[el1] - featvec[el2])

In [12]:
n = len(images.keys())
m = len(cat)
l = 8
k = 5

In [13]:
from localsearch import localsearch

lsS, lsCost, lsEvals = localsearch(l, k, featvec, nrm, nrmdist, catimg, 0.2)

print lsCost

Local search value after initialization =  13.1938834438
Intermediate cost at step  0  =  14.4299159368
Intermediate cost at step  1  =  14.9362433585
Intermediate cost at step  2  =  15.5423959332
Intermediate cost at step  3  =  16.6293655124
Intermediate cost at step  4  =  17.0423959332
Intermediate cost at step  5  =  17.5293655124
Intermediate cost at step  6  =  18.1435928673
Intermediate cost at step  7  =  18.5294247297
Intermediate cost at step  8  =  18.742122601
Intermediate cost at step  9  =  18.742122601
Local Search gives cost =  18.742122601
18.742122601


In [14]:
from greedysum import greedysum

gsS, gsCost, gsEvals = greedysum(l, k, featvec, nrm, nrmdist, catimg)

print gsCost

Greedy Sum gives cost =  18.97978888
18.97978888


In [15]:
from greedymerge import greedymerge

gmS, gmCost, gmEvals = greedymerge(l, k, featvec, nrm, nrmdist, catimg)

print gmCost

Greedy Merge gives cost =  43.5473737149
Size of S is  67
43.5473737149


In [16]:
from novel import novel

start = time.time()
myS, myBstS, cost = novel(l, k, featvec, nrm, nrmdist, catimg)
print 'Ran in ', time.time() - start

# sanity check
# print ''
# print len(myS)
# for c in cat:
#     print set(myBstS[c]).issubset(myS), len(myBstS[c]) == k

We obtained objective value  18.97978888  for set  ['2007_000648.jpg', '2007_001027.jpg', '2007_000727.jpg', '2007_001423.jpg', '2007_000793.jpg', '2007_001430.jpg', '2007_001416.jpg', '2007_000661.jpg']
Ran in  1.0007121563


In [17]:
from greedyclustering import greedyclustering

start = time.time()
greedyCL = greedyclustering(l, k, featvec, nrm, nrmdist, catimg)
print 'Ran in ', time.time() - start

Converged after  7  iterations
Our cluster set has  8  elements
They are  ['2007_000243.jpg', '2007_001526.jpg', '2007_000061.jpg', '2007_001423.jpg', '2007_001458.jpg', '2007_000804.jpg', '2007_000464.jpg', '2007_000720.jpg']

Obtained cost  13.8209337549  for set  ['2007_000243.jpg', '2007_001526.jpg', '2007_000061.jpg', '2007_001423.jpg', '2007_001458.jpg', '2007_000804.jpg', '2007_000464.jpg', '2007_000720.jpg']
Ran in  0.108950138092
