# ECC058 - DHLab

# Worked example: URLImage Class and IIIF


#### Prof. Stephen White
#### stephen.white@unive.it


#### Resources:
https://iiif.io/api/image/2.0/#identifier

**BaseURL/Region/Size/Rotation/ImageFormat**

# Display Image from Inscription Image URL for Query Results
1. Load query results page
2. Find a single result
    - Create BeautifulSoup from result string
    - Get a list of all results reffer 'table' elements of class 'treffertabelle'
    - Load a single result into a variable
3. Find and load iiif image page URL link if exist
    - Find iiif imgage page link to inscription image
    - If exist load href property into a variable as full URL
    - Load URL results
4. Find and load URL for IIIF json manifest
    - Create BeautifulSoup from result string
    - Get a list of all results reffer 'table' elements of class 'treffertabelle'
    - Load a single result into a dictionary variable
    - Using the manifest, find and load into a varible the IIIF image url
5. Display Image
    - Create URLImage object
    - Load Image
    - Show Image

In [None]:
#developement commands
%reload_ext autoreload
%autoreload 2


#load needed libraries
import urllib.request
from bs4 import BeautifulSoup
import re,json
import pandas as pd
import EpiProject.urlimage as urlimage

In [None]:
# Create query variables
# base url string of site
urlBase = "https://edh-www.adw.uni-heidelberg.de"
# query parameters for the url, page size set to 100
urlQueryBase = "/inschrift/erweiterteSuche?hd_nr=&tm_nr=&beleg=c&land=&fo_antik=&fo_modern=&fundstelle=&region=&compFundjahr=eq&fundjahr=&aufbewahrung=&inschriftgattung=&sprache=L&inschrifttraeger=&compHoehe=eq&hoehe=&compBreite=eq&breite=&compTiefe=eq&tiefe=&bh=&palSchreibtechnik=&dat_tag=&dat_monat=&dat_jahr_a=&dat_jahr_e=&hist_periode=&religion=&literatur=&kommentar=&p_name=&p_praenomen=&p_nomen=&p_cognomen=&p_supernomen=&p_tribus=&p_origo=&p_geschlecht=&p_status=&compJahre=eq&p_lJahre=&compMonate=eq&p_lMonate=&compTage=eq&p_lTage=&compStunden=eq&p_lStunden=&atext1=&bool=AND&atext2=&beleg89=ja&nurMitFoto=ja&sort=hd_nr&anzahl=100&addFeldMaterial=ja&addFeldDTyp=ja&addFeldIGat=ja&start="
# offset of the query
offset = 0
# parameter to specify language as english
paramLang = "&lang=en"

#create query
url = urlBase + urlQueryBase + str(offset)
#print(url)

In [None]:
#1. Load query results page
f = urllib.request.urlopen(url)
htmlDocString = f.read() 
f.close()
# take a quick look at the html
#print(htmlDocString)


In [None]:
#2. Find a single result

#    - Create BeautifulSoup from result string
htmlSoup = BeautifulSoup(htmlDocString,'html.parser')

#print(htmlSoup.prettify())

#    - Get a list of all results reffer 'table' elements of class 'treffertabelle'
tableRefferList = htmlSoup.select('table.treffertabelle')


#print(type(tableRefferList)) #<class 'list'>
#print(len(tableRefferList))  #100

#    - Load a single result into a variable
#set curTable to 3rd table scrap info for a single result while developing
#later this will just be a loop variable
curTable = tableRefferList[2]


#print (curTable.prettify())

In [None]:
#3. Find iiif image URL link if exist

#      - Find img link to inscription image
iiifLinksList = [a for a in curTable.select('a[href*=iiif]')]


#print(iiifLinksList)

#      - If exist load href property into a variable as full URL
urlIIIFPage = None #set to None for case where there is no IIIF link
for link in iiifLinksList:
    urlIIIFPage = urlBase + link.get('href')
    break

print(urlIIIFPage)

#    - Load URL results
imgPageHtml = None   #set to None to single other code if not loaded

if urlIIIFPage != None:
    req = urllib.request.Request(urlIIIFPage, headers = {'User-agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5'})
    f = urllib.request.urlopen(req)
    imgPageHtml = f.read() 
    f.close()


#print(imgPageHtml)


In [None]:
#4. Find and load URL for IIIF manifest
#    - Create BeautifulSoup from result string
imgPageSoup = BeautifulSoup(imgPageHtml,'html.parser')

#print(imgPageSoup.prettify())



#    - Get url link to manifest.json using anchor as span id 'copy_edh_uri'
manJsonLinks = imgPageSoup.select("#copy_edh_uri a")
urlManJson = None #set to None for case where there is no IIIF link
for link in manJsonLinks:
    urlManJson = link.get('href')
    break

#print(urlManJson)




#    - Load manifest.json into a dictionary variable
dictImgManifest = None   #set to None to signal other code if not loaded

if urlManJson != None:
    req = urllib.request.Request(urlManJson, headers = {'User-agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5'})
    f = urllib.request.urlopen(req)
    jsonBytes = f.read()
    dictImgManifest = json.loads(jsonBytes)
    f.close()

    
#print(dictImgManifest)
#print(json.dumps(dictImgManifest, indent=2))


#    - Using the manifest, find and load into a varible the IIIF image url
urlImgIIIF = dictImgManifest['sequences'][0]['canvases'][0]['images'][0]['resource']['@id']

#print(urlImgIIIF)

In [None]:
revFindIndex = urlImgIIIF.rfind('/')
#print('revFindIndex =',revFindIndex)
#print('string from start to but not including revFindIndex character(s) :\n\t','"'+urlImgIIIF[:revFindIndex]+'"')
#print('string from revFindIndex including revFindIndex character(s) :\n\t','"'+urlImgIIIF[revFindIndex:]+'"')

iiifImgFormatName = urlImgIIIF[revFindIndex+1:]
print('iiifImgFormatName = ',iiifImgFormatName)

remainingURL = urlImgIIIF[:revFindIndex]
#print('remainingURL',remainingURL)
revFindIndex = remainingURL.rfind('/')
#print('revFindIndex =',revFindIndex)

iiifOrigRot = remainingURL [revFindIndex+1:]
print('iiifOrigRot = ',iiifOrigRot)

remainingURL = urlImgIIIF[:revFindIndex]
#print('remainingURL',remainingURL)
revFindIndex = remainingURL.rfind('/')
#print('revFindIndex =',revFindIndex)


iiifOrigSize = remainingURL [revFindIndex+1:]
print('iiifOrigSize = ',iiifOrigSize)

remainingURL = urlImgIIIF[:revFindIndex]
#print('remainingURL',remainingURL)
revFindIndex = remainingURL.rfind('/')
#print('revFindIndex =',revFindIndex)


iiifOrigRegion = remainingURL [revFindIndex+1:]
#print('iiifOrigRegion = ',iiifOrigRegion)

iiifBaseURL = urlImgIIIF[:revFindIndex]
#print('iiifBaseURL',iiifBaseURL)

iiif10pctURL = iiifBaseURL+'/'+iiifOrigRegion +'/'+'pct:10'+'/'+iiifOrigRot +'/'+iiifImgFormatName 
#print('iiif10pctURL = ',iiif10pctURL)


In [None]:
#5. Display Image
#    - Create URLImage object
myIMG = urlimage.URLImage(iiif10pctURL)


#    - Load Image
if myIMG.loadImage():
    print(myIMG.getImage().size)
    #    - Show Image
    myIMG.show()
else:
    print('something failed')



# Play Around Area - Scratch Code