Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
226 lines (186 sloc) 7.89 KB
#!/usr/bin/python
# genCollectionInterface.py
# generate a web interface from the result of a search on the internet archive
# dumped as a csv file, organized into categories from the category list. The
# categories are searched for in the subject field of the csv dump
# all un-categorized data is saved in 'otherCategory.html'
#
# 1) extract header, body, and etc from the template
# 2) output header
# 3) start body
# 4) for each entry in the csv file
# parse out the fields, save in list, categorize
# build the appropriate inteface element, save in categories lists
# 5) buildIntefaceElement(meta)
# get the cover using the OLID map and the OL Cover API
# create link to bookreader with URL of book using cover as "button"
# make the title, display under cover
# make a tooltip string using the metadata
# 6) generate the body using the interface elements above, one page for each category
#
#
#
import os
import sys
import csv
import re
def usage():
print 'usage: genweb.py [options] <input csv> <input categories file> [<output.html>]'
print ' options:'
print ' -attached-storage: generate links for attached storage solution'
# creates the interface elements (html, links, tooltip stuff, cover img link, etc) given
# a list of the metadata elements
def buildInterfaceElement(outfile,m):
# build the output for the div containg book cover, title, link
global attStorage # attached storage link switch
# open the div template
divstr = open('divtmpl.html.tmpl','r').read();
# replace the DOM element id with a generated one, use the book id?
divstr = re.sub('\$idstr', m[4],divstr)
# replace the cover img src with the one for this book
# this will use the mapping from iacl to open library id's from the
# original book list
olid =mapIaclIDToOLID(m[4])
if olid != '':
if attStorage == 0:
coverurl = getCoverUrl(olid)
else:
coverurl = getCoverUrl(m[4])
else: coverurl = ''
divstr = re.sub('\$coverstr',coverurl,divstr)
#replace the iacl lookup id with the one for this book in the link
if attStorage == 0:
#switch which of the following divstr assignment is commented to select what type of link
# interface to create
# 1. link to download djvu
# divstr = re.sub('\$iaclid',"http://archive.us.org/stream/" + m[4] + "/" + m[4] + ".djvu",divstr)
# 2. link to the embedded bookreader
divstr = re.sub('\$iaclid', "http://www.us.archive.org/GnuBook/GnuBookEmbed.php?id=" + m[4], divstr)
else:
# 3. link to already downloaded content on attached storage
divstr = re.sub('\$iaclid',"file:///media/LIB/djvu/" + m[4] + ".djvu",divstr)
#replace title
# tear the titles down to max 8 words
# split first at ':', then max 8 words of what is left of ':'
l = (m[1].split(':',1)[0]).split(' ',8)[0:7]
title = ''
for w in l:
title = title + w + ' '
divstr = re.sub('\$title',title,divstr)
outfile.write(divstr)
# build the tooltip list for each output file
# format of Tip:
# new Tip('<dom_id_of_element>', '<b>Full Title:<\/b> Adventures of Huckleberry Finn (Tom Sawyer\'s comrade)...<br \/><br \/><b>Author:<\/b> Twain, Mark, 1835-1910<br \/><br \/><b>Details:<\/b> Spec. Coll. copy 1: Green cloth over boards, blocked in gold and black, in case. Ex libris Olive Percival. Gift of Jo Swerling<br \/><br \/><b>Subjects:<\/b> Adventure and adventurers, Mississippi River -- Juvenile literature,Missouri -- Juvenile literature, Adventures of Huckleberry Finn');
def buildTooltip(ttfile,m):
# build the output string - probably wind up as tool tip
outstr = "new Tip('" + re.sub('\'','\\\'', m[4]) + "', '<b>Full Title:<\/b> " + re.sub('\'','\\\'', m[1])
outstr = outstr + '<br \/><br \/><b>Author:<\/b> ' + re.sub('\'','\\\'', m[0])
outstr = outstr + '<br \/><br \/><b>Details:<\/b> ' + re.sub('\'','\\\'', m[5] )
outstr = outstr + '<br \/><br \/><b>Subjects:<\/b> ' + re.sub('\'','\\\'', m[2] )
# outstr = outstr + 'date: ' + re.sub('\'','\\\'', m[3].split('-',1)[0]) + '<br>\n'
# outstr = outstr + 'file: ' + re.sub('\'','\\\'', m[4]) + '<br>\n'
outstr = outstr + "');\n"
ttfile.write(outstr)
# returns true if book is in the booklist file
def isInBookList(id):
bl = open('iaclBookList.txt','r')
for line in bl.readlines():
if line.find(id) != -1:
return 1
return 0
#returns the Open Library ID using the mappings in the book list file
def mapIaclIDToOLID(id):
bl = open('iaclBookList.txt','r')
for line in bl.readlines():
if line.find(id) != -1:
return(line.split(' ',1)[0].lstrip('/b'))
return('')
# returns the cover url, argument is img file basename
def getCoverUrl(name):
global attStorage;
if attStorage == 0:
return "http://covers.openlibrary.org/b/OLID/" + name + "-M.jpg"
else:
return "file:///media/LIB/covers/" + name + ".jpg"
# MAIN
i = 0
attStorage = 0
infile=''
outfile=''
catfile=''
meta=[] # list of csv meta data
for arg in sys.argv:
if arg[0] == '-':
if arg == "-attached-storage":
attStorage = 1;
else:
print 'unrecognized option: ' + arg + ' IGNORED'
continue;
if i == 1:
infile=arg
if i == 2:
catfile=arg
i = i+1
print i
print attStorage
if i < 3:
usage()
sys.exit(1)
print 'opening input csv for read: ' + infile
reader=csv.reader(open(infile, 'r'))
title = author = description = subject = date = ''
#indexes for the various fields
aidx=1 # author
tidx=7 # title
sidx=6 # subject, will extract category from here
didx=2 # date
fidx=5 # file - identifier field used to derive the book link
descidx=3 # description
for row in reader:
for f in row:
author=row[aidx]
title=row[tidx]
subject=row[sidx]
date=row[didx]
file=row[fidx]
desc=row[descidx]
if isInBookList(file):
meta.append([author, title, subject, date, file, desc])
cats = open(catfile,'r').readlines() # get the categories list
for m in meta:
# filter subject categories
# search subject m[2] for a match in categories
outcat='' # category string
for line in cats:
if re.search(line.rstrip(),m[2],re.I) != None:
print "found a category match for " + line
print "subject is: " + m[2]
print "-------------------------------------------------"
# if there's a match output to that category
outcat = line.rstrip() + '.html'
outcatf = open( outcat, 'a')
ttfname = line.rstrip() + 'Tooltip.js'
ttf = open( ttfname, 'a')
buildInterfaceElement(outcatf,m);
buildTooltip(ttf,m)
if (outcat == ''): # went through cat filter without a match
outcatf = open('other.html', 'a') # dump to the catchall file
ttf = open( 'otherTooltip.js', 'a')
buildInterfaceElement(outcatf,m);
buildTooltip(ttf,m)
# now concat templates head, <cat>.html, <cat_tooltip>.js and foot
cathtml = open('categories.html','w')
cathtml.write(re.sub('\$header','Categories',open('headtmpl.html.tmpl','r').read()))
for line in cats:
if os.path.exists(line.strip() + '.html') != True:
continue
# write the category html file
open(line.strip() + 'Category.html', 'w').write( re.sub('\$header',line.strip(),open('headtmpl.html.tmpl','r').read()) + open(line.strip() + '.html','r').read() + "<script type='text/javascript' language='javascript'>" + open(line.strip() + 'Tooltip.js','r').read() + open('foottmpl.html.tmpl','r').read() )
# write the link to the category in the categories.html
cathtml.write('<a href="' + line.strip() + 'Category.html' + '">' + line.strip() + "</a><br>\n")
if os.path.exists('other.html') == True:
open('otherCategory.html', 'w').write(open('headtmpl.html.tmpl','r').read() + open('other.html','r').read() + "<script type='text/javascript' language='javascript'>" + open('otherTooltip.js','r').read() + open('foottmpl.html.tmpl','r').read() )
cathtml.write('<a href="otherCategory.html">Other Category</a><br>\n')
# complete the categories.html
cathtml.write(open('foottmpl.html.tmpl','r').read())
sys.exit()