Skip to content

Commit

Permalink
clustering example for flickr
Browse files Browse the repository at this point in the history
  • Loading branch information
jhofman committed Jan 31, 2011
1 parent 6f2ae8e commit 0aa3ca6
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 3 deletions.
89 changes: 89 additions & 0 deletions code/image_data/cluster_flickr.py
@@ -0,0 +1,89 @@
#!/usr/bin/env python

import scipy as sp
import scipy.cluster.vq as spvq
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import glob
from math import sqrt, ceil

from imgtools import *

def plot_montage(images, ndx, ncol=None):

N = len(ndx)

if not ncol:
ncol = int(sqrt(N))

f = plt.figure(dpi=100)

row = 0
col = 0
for i in range(N):

I = sp.array(images[ndx[i]], dtype='float32')
I = I / I.max()

height = I.shape[0]
width = I.shape[1]
ax = plt.figimage(I, xo=width*col, yo=height*row, origin='lower')

col += 1
if col % ncol == 0:
row += 1
col = 0

f.set_figheight(row*height/100)
f.set_figwidth(ncol*width/100)

return f

if __name__=='__main__':

directory = 'flickr_vivid'
bins = 10
K = 3

pattern = '%s/*.jpg' % directory
print pattern
fnames = glob.glob(pattern)

N = len(fnames)
print "reading %d image files from %s" % (N, directory)
X = sp.zeros( (0,3*bins) )
images = []
for i, fname in enumerate(fnames):
try:
I = mpimg.imread(fname)

X = sp.vstack( (X, rgb_features(I, bins)) )
images.append(I)

except IOError:
print "error reading" , fname

if i % int(N/10) == 0:
print "%d/%d images read" % (i,N)


X = spvq.whiten(X)

sp.random.seed(20110201)

centers, err = spvq.kmeans(X, K)
print err

assignments, err = spvq.vq(X, centers)

for k in range(K):
ndx = sp.where(assignments == k)

f = plot_montage(images, ndx[0])

fname = '%s_cluster_%d.png' % (directory, k)
print "saving" , fname

plt.savefig(fname)

del(f)
4 changes: 2 additions & 2 deletions code/image_data/download_flickr.py
Expand Up @@ -20,9 +20,9 @@ def photo_url(photo, size='s'):
tags = sys.argv[1]
n = int(sys.argv[2])
else:
# default to picture of chairs
# default to 500 pictures tagged with 'vivid'
tags = 'vivid'
n = 100
n = 500

# grab the top-n most interesting photos tagged with 'tags'
query = '''select * from flickr.photos.search(%d) where
Expand Down
21 changes: 20 additions & 1 deletion code/image_data/imgtools.py
Expand Up @@ -26,6 +26,7 @@ def rgb_hist(I, ax, bins=256):
plt.axis('tight')
ax.set_aspect(1./ax.get_data_ratio())


def imshow_hist(I, bins=256):

f = plt.figure()
Expand All @@ -40,6 +41,22 @@ def imshow_hist(I, bins=256):

return f


def rgb_features(I, bins=10):

x = sp.array([])

# run over red, green, and blue channels
channels = ('r','g','b')
for i, color in enumerate(channels):
# get count pixel intensities for this channel
counts, bins = sp.histogram(I[:,:,i].flatten(), bins=bins)

x = sp.concatenate( (x, counts) )

return x


if __name__=='__main__':

if len(sys.argv) == 3:
Expand All @@ -65,4 +82,6 @@ def imshow_hist(I, bins=256):

# save figure
base, ext = os.path.splitext(fname)
plt.savefig('%s_%d.png' % (base, bins))
fname = '%s_%d.png' % (base, bins)
print "saving" , fname
plt.savefig(fname)

0 comments on commit 0aa3ca6

Please sign in to comment.