The Intelligence of Dogs

In [None]:
from collections import namedtuple
import pathlib
from pyquery import PyQuery as pq
from urllib import parse

# Wikipedia en domain.
WIKI = 'https://en.wikipedia.org'

# Intelligence tier rank thresholds.
TIER_THRESH = [10, 31, 62, 104, 127, 138]

# Breed of the dog.
Breed = namedtuple('Breed', 'rank, name, tier, wiki, img, raw, proc')

# Returns PyQuery object for wiki path p.
def pq_wiki(p):
    return pq(url=parse.urljoin(WIKI, p))

# Returns the img src of the infobox image.
def new_breed(rank, wiki):
    doc = pq_wiki(wiki)
    name = doc('#firstHeading').text()
    img = doc('.infobox-image>a>img')[0].attrib['src']
    fname = '{:0>3d}_{}'.format(rank, name.replace(' ', '_'))
    raw = '{}{}'.format(fname, pathlib.Path(img).suffix)
    proc = '{}.jpg'.format(fname)
    tier = 0
    for i, thresh in enumerate(TIER_THRESH):
        if (rank <= thresh):
            tier = len(TIER_THRESH) - i
            break
    return Breed(rank=rank, name=name, tier=tier, wiki=wiki, img=img, raw=raw, proc=proc)

Read/Write breeds and raw images

In [None]:
import requests
import simplejson

def write_breeds(breeds):
    f = open('breeds.json', 'w')
    f.write(simplejson.dumps(breeds, indent=4*' '))
    f.close()

def read_breeds():
    f = open('breeds.json', 'r')
    return [Breed(**d) for d in simplejson.loads(f.read())]

def download_raw(breeds):
    for breed in breeds:
        f = open('raw/{}'.format(breed.raw), "wb")
        f.write(requests.get('https:{}'.format(breed.img)).content)
        f.close()

In [None]:
# Fetch and parse the page.
doc = pq_wiki('/wiki/The_Intelligence_of_Dogs')

# Add 1 to the index to get rank.
# Some wierd reason [17, 130] rank is [Collie, Mastiff] which is a type of dog. Skipping.
skip = frozenset([17, 130])
breeds = [new_breed(i+1, a.attrib['href']) for i, a in enumerate(doc('ol>li>a')) if i+1 not in skip]

# Download raw images.
download_raw(breeds)

In [None]:
write_breeds(breeds)
# Just testing the read.
breeds = read_breeds()

Add frame to the images

In [None]:
from collections import Counter
import cv2 as cv
import numpy as np
import random
import pathlib
import shutil
from sklearn.cluster import KMeans

# Returns BGR color.
def color(color):
    rgb = color[1:]
    return [int(rgb[4:6], 16), int(rgb[2:4], 16), int(rgb[:2], 16)]

# Scanner Darkly: red, oranga, yellow, green, blue, violet
PALETTE = {1: color('#912318'), 2: color('#EE5D02'), 3: color('#E5ED3C'), 4: color('#80A82E'), 5:  color('#496BBC'), 6: color('#3B3559')}
SIZE = 500
BORDER = 32

# Read the raw image and scale to size.
def read_breed_raw(raw, size):
    image = cv.imread('raw/{}'.format(raw), cv.IMREAD_COLOR)
    scale = size/max(image.shape)
    dim = (int(image.shape[1] * scale), int(image.shape[0] * scale))
    return cv.resize(image, dim, interpolation=cv.INTER_LANCZOS4)

# Add Scanner Darkly like effect.
def scanner_darkly(image, blur, n_clusters, ribbon):
    # Equalize histogra for Y.
    image = cv.cvtColor(image, cv.COLOR_BGR2YUV)
    image[:, :, 0] = cv.equalizeHist(image[:, :, 0])
    image = cv.cvtColor(image, cv.COLOR_YUV2BGR)

    # Smooth image.
    image = cv.GaussianBlur(image, blur, cv.BORDER_WRAP)
    (h, w) = image.shape[:2]
    
    # Quantize colors.
    image = cv.cvtColor(image, cv.COLOR_BGR2LAB)
    image = image.reshape((image.shape[0] * image.shape[1], 3))
    clt = KMeans(n_clusters = n_clusters)
    labels = clt.fit_predict(image)
    quant = clt.cluster_centers_.astype('uint8')[labels]

    # Add least common color as the background.
    hist = Counter(labels).most_common()
    labels = np.full((SIZE, SIZE), hist[n_clusters-1][0])
    bg = clt.cluster_centers_.astype('uint8')[labels]
    bg = bg.reshape((SIZE, SIZE, 3))
    bg = cv.cvtColor(bg, cv.COLOR_LAB2BGR)

    # Add the frame.
    yoff = round(w/3)
    xoff = round(h/3)
    bg[yoff:yoff+BORDER, 0:SIZE] = np.full((BORDER, SIZE, 3), ribbon)
    bg[0:SIZE, xoff:xoff+BORDER] = np.full((SIZE, BORDER, 3), ribbon)

    # Write quantized image.
    quant = quant.reshape((h, w, 3))
    quant = cv.cvtColor(quant, cv.COLOR_LAB2BGR)

    yoff = round((SIZE-h)/2)
    xoff = round((SIZE-w)/2)
    bg[yoff:yoff+h, xoff:xoff+w] = quant
    
    return bg

# Process all breeds.
def proc_breeds(breeds, num):
    shutil.rmtree('proc')
    pathlib.Path('proc').mkdir()
    for breed in random.sample(breeds, num):
        image = read_breed_raw(breed.raw, 420)
        proc = scanner_darkly(image, (5, 5), 12, PALETTE[breed.tier])
        cv.imwrite('proc/{}'.format(breed.proc), proc)

In [None]:
proc_breeds(breeds, len(breeds))

Add Logo

In [None]:
def create_logo():
    doc = pq_wiki('/wiki/Paw')
    img = doc('.thumbinner>a>img')[0].attrib['src']
    f = open('logo/raw.jpg', "wb")
    f.write(requests.get('https:{}'.format(img)).content)
    f.close()
    image = cv.imread('logo/raw.jpg', cv.IMREAD_COLOR)
    scale = 420/max(image.shape)
    dim = (int(image.shape[1] * scale), int(image.shape[0] * scale))
    image = cv.resize(image, dim, interpolation=cv.INTER_LANCZOS4)
    proc = scanner_darkly(image, (5, 5), 12, [0, 0, 0])
    cv.imwrite('logo/proc.jpg', proc)
    logo_json = {'name': 'The Intelligence of Dogs', 'wiki': '/wiki/The_Intelligence_of_Dogs', 'img': img}
    f = open('logo/meta.json', "w")
    f.write(simplejson.dumps(logo_json, indent=4*' '))
    f.close()

In [None]:
create_logo()

Add Background

In [None]:
def create_bg():
    doc = pq_wiki('/wiki/A_Scanner_Darkly_(film)')
    img = doc('.infobox-image>a>img')[0].attrib['src']
    f = open('bg/raw.jpg', "wb")
    f.write(requests.get('https:{}'.format(img)).content)
    f.close()
    image = cv.imread('bg/raw.jpg', cv.IMREAD_COLOR)
    scale = 420/max(image.shape)
    dim = (int(image.shape[1] * scale), int(image.shape[0] * scale))
    image = cv.resize(image, dim, interpolation=cv.INTER_LANCZOS4)
    proc = scanner_darkly(image, (5, 5), 12, [255, 255, 255])
    cv.imwrite('bg/proc.jpg', proc)
    logo_json = {'name': 'A Scanner Darkly (film)', 'wiki': '/wiki/A_Scanner_Darkly_(film)', 'img': img}
    f = open('bg/meta.json', "w")
    f.write(simplejson.dumps(logo_json, indent=4*' '))
    f.close()

In [None]:
create_bg()

Debug

In [None]:
from matplotlib import pyplot as plt
import pathlib

f = plt.figure(figsize=(30,180))
for i, p in enumerate(sorted(pathlib.Path('proc').iterdir())):
    image = cv.imread(str(p), cv.IMREAD_COLOR)
    f.add_subplot(30, 5, i+1)
    plt.imshow(cv.cvtColor(image, cv.COLOR_BGR2RGB))
    plt.title(str(p))
    plt.axis('off')
plt.show()

In [None]:
import json

def pdesc(rank):
    j = json.loads(simplejson.dumps(breeds[rank-1]))
    del j['raw']
    del j['proc']
    print(j['name'])
    print('```\n{}\n```'.format(simplejson.dumps(j, indent=4*' ')))