# Class Demos from 20251024 - Oct 24, 2025

Demoing various things, including:

- working with files (using Path from `pathlib`)
- XPath & functionality in python using ET (Element Tree)
- MODS and searching through MODS (encoded in XML)
- working with LOC 'API' to extract collection content

In [1]:
from pathlib import Path
import xml.etree.ElementTree as ET

In [4]:
ead_file = Path('../data/xml/day_20221004_205435_UTC__ead.xml')

In [5]:
ead_file.is_file()

True

In [6]:
tree = ET.parse(ead_file)
root = tree.getroot()

print(root[:250])

[<Element '{http://ead3.archivists.org/schema/}control' at 0x1074cd1c0>, <Element '{http://ead3.archivists.org/schema/}archdesc' at 0x1074ce660>]


In [7]:
ns = {
    'ead': 'http://ead3.archivists.org/schema/'
}

In [9]:
title = tree.find('ead:control/ead:filedesc/ead:titlestmt', ns)
print(title.tag, title.text)

{http://ead3.archivists.org/schema/}titlestmt None


In [None]:
for element in tree.findall('.//ead:titlestmt', ns):
    print(element.tag, element.text)

{http://ead3.archivists.org/schema/}titlestmt None


In [15]:
for element in tree.findall('.//ead:titleproper', ns):
    print(element.tag, element.text)

{http://ead3.archivists.org/schema/}titleproper Finding Aid for the William R. Day Collection day 
{http://ead3.archivists.org/schema/}titleproper William R. Day Collection


In [None]:
for element in tree.findall('.//ead:c01', ns):
    #get the 
    print(f'Series id: {element.attrib["id"]}\nSeries name: {element.text}')

Series id: aspace_ref1
Series name: None
Series id: aspace_ref18
Series name: None
Series id: aspace_ref22
Series name: None
Series id: aspace_ref26
Series name: None
Series id: aspace_ref28
Series name: None


In [None]:
for element in tree.findall('.//ead:c01', ns):
    #get the id
    print(f'Series id: {element.attrib["id"]}b tag: {element.tag}\n')

    scope = element.find('.//ead:scopecontent/ead:p', ns)
    print(f'Scope note:\n{scope.text}\n')

Series id: aspace_ref1 tag: {http://ead3.archivists.org/schema/}c01

Scope note:
The Correspondence and Papers series contains correspondence and papers from William Day and various family members.

Series id: aspace_ref18 tag: {http://ead3.archivists.org/schema/}c01

Scope note:
The Manuscripts series contains work by William Day and his son, Stephen Day. It also has a dissertation about William Day by Joseph McLean, and a folder of miscellaneous materials.

Series id: aspace_ref22 tag: {http://ead3.archivists.org/schema/}c01

Scope note:
The Newspaper series includes issues of the University of Michigan newspaper 

Series id: aspace_ref26 tag: {http://ead3.archivists.org/schema/}c01

Scope note:
The Publications series brings together the published materials within the William R. Day Collection, including some of his speeches.

Series id: aspace_ref28 tag: {http://ead3.archivists.org/schema/}c01

Scope note:
The Ephemera series contains a carte de visite and a packet of seeds.



## Extract: List the things

Harvest? Extract? :shrug:

In [32]:
import json
import requests
import csv
from pathlib import Path

In [23]:
# set up for the API requests
endpoint = 'https://www.loc.gov/free-to-use/'
parameters = { 'fo':'json' }
collection = 'libraries'

In [24]:
collection_list = requests.get(endpoint + collection, params=parameters)

collection_list.url

'https://www.loc.gov/free-to-use/libraries?fo=json'

In [26]:
collection_list.status_code

200

In [27]:
collection_list.json()

{'breadcrumbs': [{'Library of Congress': 'https://www.loc.gov'},
  {'Library of Congress Free to Use and Reuse': 'https://www.loc.gov/free-to-use/'},
  {'Free to Use and Reuse: Libraries': 'https://www.loc.gov/free-to-use/libraries/'}],
 'content': {'active': True,
  'description': ['<p>People create libraries in all shapes and sizes to serve their local communities. This set of free to use images represents buildings in almost all 50 states of the United States.</p>\n<p>Browse more\xa0<a href="/free-to-use/">content that is free to use and reuse</a>. </p>\n'],
  'disable_navigation': True,
  'image_url': ['/static/portals/free-to-use/public-domain/libraries/libraries-1.jpg'],
  'link': 'https://www.loc.gov/free-to-use/libraries/',
  'markup': '{% load filters %}\n<section class=\'public-domain-abstract\'>\n    <p>{{content.abstract|safe}}</p>\n</section>\n<style>\n    .public-domain-set > li {\n        padding-bottom: 10px;\n    }\n</style>\n<section class="public-domain"><a id="reuse

In [28]:
collection_json = collection_list.json()

for item in collection_json['content']['set']['items']:
    print(item)

{'image': '/static/portals/free-to-use/public-domain/libraries/libraries-1.jpg', 'link': '/resource/cph.3f05183/', 'title': 'For greater knowledge, on more subjects, use your library more often. Illinois WPA Arts Project, 1936-1941. Prints & Photographs Division'}
{'image': '/static/portals/free-to-use/public-domain/libraries/libraries-2.jpg', 'link': '/resource/highsm.20336/', 'title': 'Noyes Library for Young Children. Kensington, Maryland. Photo by Carol M. Highsmith,  2011. Prints & Photographs Division'}
{'image': '/static/portals/free-to-use/public-domain/libraries/libraries-3.jpg', 'link': '/resource/fsa.8d24709/', 'title': 'Bethune-Cookman College. Students in the library reading room, Daytona Beach, Florida. Gordon Parks, 1943. Prints & Photographs Division'}
{'image': '/static/portals/free-to-use/public-domain/libraries/libraries-4.jpg', 'link': '/resource/highsm.36052/', 'title': 'Public library in Antonito,  Colorado, near the New Mexico border. Photo by Carol M. Highsmith,

In [30]:
len(collection_json['content']['set']['items'])

62

In [34]:
collection_set_list = Path('collection_set_list.csv')

with open(collection_set_list, 'w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=['image','link','title'])
    writer.writeheader()

    for item in collection_json['content']['set']['items']:
        item['title'] = item['title'].rstrip()
        writer.writerow(item)

## Get the Metadata

aka, "show me the things!" :tada:

In [35]:
item_data_sample = requests.get('https://loc.gov/resource/cph.3f05183/?fo=json')

item_data_sample.status_code

200

In [38]:
item_data_sample.json()

{'articles_and_essays': None,
 'cite_this': {'apa': 'Federal Art Project, S. <cite>For Greater Knowledge on More Subjects Use Your Library More Often</cite>. Illinois Chicago, None. [Chicago: illinois wpa art project, between 1936 and 1941] [Photograph] Retrieved from the Library of Congress, https://www.loc.gov/item/98508155/.',
  'chicago': 'Federal Art Project, Sponsor. <cite>For Greater Knowledge on More Subjects Use Your Library More Often</cite>. Illinois Chicago, None. [Chicago: illinois wpa art project, between 1936 and 1941] Photograph. https://www.loc.gov/item/98508155/.',
  'mla': 'Federal Art Project, Sponsor. <cite>For Greater Knowledge on More Subjects Use Your Library More Often</cite>. [Chicago: illinois wpa art project, between 1936 and 1941] Photograph. Retrieved from the Library of Congress, &lt;www.loc.gov/item/98508155/&gt;.'},
 'item': {'_version_': 1754109026683060224,
  'access_restricted': False,
  'aka': ['https://www.loc.gov/pictures/item/98508155/',
   'http