# Session 13: Building a recommendation system

We now build an set of websites to view all of visual data and metadata
in a collection of images.

## Build an exploratory website

In this part of the tutorial, we will built an exploratory set of webpages
locally on your computer that you can navigate to understand a corpus of 
interest.

To start, we once again need to load in a few Python modules.

In [None]:
import numpy as np
import scipy as sp
import pandas as pd

import os
from os.path import join

Next, we have written a number of Python function (reusable blocks of code) that 
create the website. Rather than hiding this in another file, here is the code in
its entirety; there is no need to understand exactly what ever line is doing at 
this point. Run each block of code to load the functions into Python.

In [None]:
def rm_ext(s):
    return os.path.splitext(s)[0]

def get_ext(s):
    return os.path.splitext(s)[-1]

In [None]:
def clean_html():    
    if not os.path.exists(join("..", "html")):
        os.makedirs(join("..", "html"))
        
    if not os.path.exists(join("..", "html", "pages")):
        os.makedirs(join("..", "html", "pages"))
        
    for p in [x for x in os.listdir(join('..', 'html', 'pages')) if get_ext(x) in [".html", "html"]]:
        os.remove(join('..', 'html', 'pages', p))

In [None]:
def load_data(cn):
    X = np.load(join("..", "data", cn + "_vgg19_fc2.npy"))
    return X

In [None]:
def write_header(f, cn, index=False):
    loc = ""
    if not index:
        loc = "../"
    
    f.write("<html>\n")
    f.write('  <link rel="icon" href="{0:s}img/favicon.ico">\n'.format(loc))
    f.write('  <title>Distant Viewing Tutorial</title>\n\n')
    f.write('  <link rel="stylesheet" type="text/css" href="{0:s}css/bootstrap.min.css">'.format(loc))
    f.write('  <link href="https://fonts.googleapis.com/css?family=Rubik+27px" rel="stylesheet">')
    f.write('  <link rel="stylesheet" type="text/css" href="{0:s}css/dv.css">\n\n'.format(loc))

    f.write("<body>\n")
    f.write('  <div class="d-flex flex-column flex-md-row align-items-center p-3 px-md-4')
    f.write('mb-3 bg-white border-bottom box-shadow">\n')
    f.write('    <h4 class="my-0 mr-md-auto font-weight-normal">Distant Viewing Tutorial Explorer')
    f.write('&mdash; {0:s}</h4>\n'.format(cn.capitalize()))
    f.write('    <a class="btn btn-outline-primary" href="{0:s}index.html">Back to Index</a>\n'.format(loc))
    f.write('  </div>\n')
    f.write('\n')

In [None]:
def corpus_to_html(corpus):
    pd.set_option('display.max_colwidth', -1)
    tc = corpus.copy()
    for index in range(tc.shape[0]):
        fname = rm_ext(os.path.split(tc['filename'][index])[1])
        title = rm_ext(tc['filename'][index])
        s = "<a href='pages/{0:s}.html'>{1:s}</a>".format(fname, title)
        tc.iloc[index, tc.columns.get_loc('title')] = s

    tc = tc.drop(['filename'], axis=1)
    return tc.to_html(index=False, escape=False, justify='center')

In [None]:
def create_index(cn, corpus):
    f = open(join('..', 'html', 'index.html'), 'w')
    write_header(f, cn=cn, index=True)
    f.write('  <div style="padding:20px; max-width:1000px">\n')

    f.write(corpus_to_html(corpus))

    f.write('  </div>\n')
    f.write("</body>\n")
    f.close()

In [None]:
def get_infobox(corpus, item):
    infobox = []
    for k, v in corpus.iloc[item].to_dict().items():
        if k != "filename":
            infobox = infobox + ["<p><b>" + str(k).capitalize() + ":</b> " + str(v) + "</p>"]
    return infobox

In [None]:
def save_metadata(f, cn, corpus, X, item):
    infobox = get_infobox(corpus, item)
    
    f.write("<div style='width: 1000px;'>\n")
    f.write("\n".join(infobox))
    if item > 0:
        link = rm_ext(os.path.split(corpus['filename'][item - 1])[-1])
        f.write("<p align='center'><a href='{0:s}.html'>&#60;&#60; previous image</a> &nbsp;&nbsp;&nbsp;&nbsp;\n".format(link))

    if item + 1 < X.shape[0]:
        link = rm_ext(os.path.split(corpus['filename'][item + 1])[-1])
        f.write("&nbsp;&nbsp;&nbsp;&nbsp; <a href='{0:s}.html'>next image &#62;&#62;</a></p>\n".format(link))

    f.write("</div>\n")

In [None]:
def save_similar_img(f, cn, corpus, X, item):
    dists = np.sum(np.abs(X - X[item, :]), 1)
    idx = np.argsort(dists.flatten())[1:13]
        
    f.write("<div style='clear:both; width: 1000px; padding-top: 30px'>\n")
    f.write("<h4>Similar Images:</h4>\n")
    f.write("<div class='similar'>\n")

    for img_path in corpus['filename'][idx].tolist():
        hpath = rm_ext(os.path.split(img_path)[1])
        f.write('<a href="{0:s}.html"><img src="../../images/{1:2}/{2:s}" style="max-width: 150px; padding:5px"></a>\n'.format(hpath, cn, img_path))

    f.write("</div>\n")
    f.write("</div>\n")

In [None]:
def create_image_pages(cn, corpus, X):
    for item in range(X.shape[0]):
        
        img_path = corpus['filename'][item]
        url = os.path.split(img_path)[1]
        
        f = open(join('..', 'html', 'pages', rm_ext(url) + ".html"), 'w')
        write_header(f, cn, index=False)
    
        f.write("<div style='padding:25px'>\n")

        # Main image
        f.write("<div style='float: left; width: 610px;'>\n")
        f.write('<img src="../../images/{0:s}/{1:s}" style="max-width: 600px; max-height: 500px;">\n'.format(cn, img_path))
        f.write("</div>\n\n")
        
        # Main information box
        save_metadata(f, cn, corpus, X, item)
        
        # Similar
        save_similar_img(f, cn, corpus, X, item)

        f.write("</body>\n")
        f.close()

Now, you simply need to select one of the three corpora to work with and run the
code below. Set the variable `cn` to either "wikiart", "bewitched", or "fsa_owi"
and run the code block. It will take a minute or two to finish running.

In [None]:
cn = "wikiart"

clean_html()
corpus = pd.read_csv(join("..", "data", cn + ".csv"))
X = load_data(cn)

create_index(cn, corpus)
create_image_pages(cn, corpus, X)

You should find a folder called `html`. Open that folder and double click on the
file `index.html`, opening it in a web browser (Chrome or Firefox preferred; Safari
should work too). Do not open it in Jupyter.

You will see a list of all of the available images from the corpus you selected.
Click on one and you'll get to an item page for that image. From there you can
see the image itself, available metadata, select the previous or next image in the
corpus, and view similar images from the VGG19 similarity measurement.