### CS102: Unstructured Data - Images

In [None]:
from PIL import Image
from StringIO import StringIO
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.spatial import distance

In [None]:
# For compatibility of file access and directory listings
# across multiple platforms
import os
IB = os.environ.get('INSTABASE_URI',None) is not None
open = ib.open if IB else open
def ib_listdir(path):
    nodes = []
    page_token = u''
    while True:
        res, _ = ib.list_dir(path, start_page_token=page_token)
        nodes += [node['name'] for node in res['nodes']]
        if res.get('has_more', False):
            page_token = res.get('next_page_token', '')
        else:
            break
    return nodes
listdir = ib_listdir if IB else os.listdir

#### Set up RGB triples for basic colors (easy to add more)

In [None]:
colordict = {
    'red': (255,0,0),
    'purple': (128,0,128),
    'blue': (0,0,255),
    'green': (0,255,0),
    'yellow': (255,255,0),
    'orange': (255,165,0),
    'pink': (255,192,203),
    'white': (255,255,255),
    'gray': (128,128,128),
    'black': (0,0,0) }

#### Image dataset is directory of png or jpg files. Directory 'flags' contains 206 country flags. Filter on file (country) name to reduce dataset size.

In [None]:
directory = 'flags'
filterstring = 'nia' # Use '' for no filter
allfiles = listdir(directory)
files = [f for f in allfiles if filterstring in f]
for filename in files: print filename

### Find dominant color in images

In [None]:
for filename in files:
    # Show filename and image
    print(filename)
    data = open(directory + '/' + filename).read()
    image = Image.open(StringIO(data))
    display(image)
    # Find dominant color
    # getcolors() parameter (2500) specifies up to 2500 different colors in image;
    #   function returns 'None' if image has more than 2500 different colors
    colors = image.getcolors(2500)
#    print colors
    if colors == None:
        print 'Too many colors'
    else:
        highest = 0
        domcolor = (-1,-1,-1)
        for c in colors:
            if c[0] > highest:
                highest = c[0]
                domcolor = c[1]
        print 'Dominant color:'
        # Normalize RGB values for imshow function
        normcolor = [float(x)/255 for x in domcolor]
        plt.imshow([[normcolor]])
        plt.show()

### Find closest basic color to dominant color

In [None]:
for filename in files:
    # Show filename and image
    print(filename)
    data = open(directory + '/' + filename).read()
    image = Image.open(StringIO(data))
    display(image)
    # Find dominant color
    colors = image.getcolors(2500)
    if colors == None:
        print 'Too many colors'
    else:
        highest = 0
        domcolor = (-1,-1,-1)
        for c in colors:
            if c[0] > highest:
                highest = c[0]
                domcolor = c[1]
        # Closest basic color
        # Remove alpha value if present (RGBA to RGB)
        domcolor = domcolor[:3]
        closest = ''
        mindist = 1000
        for c in colordict:
            dist = distance.euclidean(domcolor,colordict[c])
            if dist < mindist:
                mindist = dist
                closest = c
        print 'Closest basic color:', closest, '\n'

### Find weighted average distance from each basic color

In [None]:
for filename in files:
    # Show filename and image
    print(filename)
    data = open(directory + '/' + filename).read()
    image = Image.open(StringIO(data))
    display(image)
    # Find dominant color
    colors = image.getcolors(2500)
    if colors == None:
        print 'Too many colors'
    else:
        for b in colordict:
            num = 0
            total = 0
            for c in colors:
                # Remove alpha value if present (RGBA to RGB)
                color = c[1][:3]
                dist = distance.euclidean(color,colordict[b])
                total += c[0] * dist
                num += c[0]
            wavg = total/num
            print b, wavg
        print '\n'