# Class Demos Oct 3, 2025

Things we did in class on Friday, Oct 3. 

In [1]:
import csv 

In [2]:
!cat ../collection-site-materials/metadata-template-cb.csv

objectid,title,creator,date,description,subject,location,latitude,longitude,source,identifier,type,format,language,rights,rightsstatement,display_template,object_location,image_small,image_thumb,image_alt_text,object_transcript


In [3]:
with open('../collection-site-materials/metadata-template-cb.csv') as f:
    data = csv.reader(f)
    for record in data:
        print(record)

['objectid', 'title', 'creator', 'date', 'description', 'subject', 'location', 'latitude', 'longitude', 'source', 'identifier', 'type', 'format', 'language', 'rights', 'rightsstatement', 'display_template', 'object_location', 'image_small', 'image_thumb', 'image_alt_text', 'object_transcript']


In [4]:
csv_file = '../collection-site-materials/metadata-template-cb.csv'

with open(csv_file) as f:
    data = csv.DictReader(f)
    
    for record in data.fieldnames:
        print(record)

objectid
title
creator
date
description
subject
location
latitude
longitude
source
identifier
type
format
language
rights
rightsstatement
display_template
object_location
image_small
image_thumb
image_alt_text
object_transcript


## Looking at JSON

Yay!

In [5]:
import json

In [6]:
stringofJsondata = '{"Results":2,"isPrinted":true, "books": []}'

jsonasPython = json.loads(stringofJsondata)

print(jsonasPython)

{'Results': 2, 'isPrinted': True, 'books': []}


In [7]:
jsonasPython

{'Results': 2, 'isPrinted': True, 'books': []}

In [8]:
for key in jsonasPython:
    print(key)

Results
isPrinted
books


In [9]:
jsonasPython["Results"]

2

In [10]:
jsonasPython

{'Results': 2, 'isPrinted': True, 'books': []}

In [11]:
newstringofJsondata = json.dumps(jsonasPython, indent = 2)

print(newstringofJsondata)

{
  "Results": 2,
  "isPrinted": true,
  "books": []
}


In [12]:
with open('test-json-file.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(jsonasPython, indent=2))
    print('wrote file')

wrote file


# Using Requests to get data from LOC

In [13]:
import json
import requests 

In [14]:
baseurl = 'http://www.loc.gov'
endpoint = '/search'

In [15]:
print('Query url:', baseurl + endpoint)

Query url: http://www.loc.gov/search


In [16]:
parameters = {
    'fo' : 'json',
    'q' : 'kittens'
}

In [17]:
r = requests.get(baseurl + endpoint, params=parameters)

print(r.status_code)

200


In [18]:
r.headers['content-type']

'application/json'

In [19]:
r.status_code

200

In [20]:
r.text[:250]

'{"breadcrumbs": [{"Library of Congress": "https://www.loc.gov"}, {"Search": "https://www.loc.gov/search/?fo=json&q=kittens"}], "expert_resources": null, "facet_trail": [{"facet": "searchTerms", "field": "searchTerms", "superset": "https://www.loc.gov'

In [21]:
for header in r.headers:
    print(header)

Date
Content-Type
Content-Length
Connection
CF-RAY
Access-Control-Allow-Origin
Referrer-Policy
Strict-Transport-Security
X-Content-Type-Options
X-Robots-Tag
X-Frame-Options
ETag
Expires
Content-Security-Policy
Age
X-Grace
X-Nearside-Cache
X-Nearside-Cache-Hits
Cache-Control
Accept-Ranges
cf-cache-status
Vary
Server


In [22]:
for key in r.json():
    print(key)

breadcrumbs
expert_resources
facet_trail
facet_views
facets
form_facets
notice
options
pagination
results
search
timestamp
views


In [23]:
for item in r.json()['results']:
    print(item['title'])

Fancy Nancy : oodles of kittens
[Kittens]
Cat and kittens
Kitten on the keys
[Mother cat and kittens]
Feeding his kittens on cornbread
The sick kitten
Boys with kitten
[Boys with kitten]
Girl with kitten
The tortoiseshell kitten, or, The adventures of Puss for three months.
The cat and her four kittens
The cat and her four kittens
Kitten on the keys
No “Kitten” Around: Cats in the Library of Congress Collection
The three jolly kittens - at the feast
Two jolly kittens at a feast
The three jolly kittens - after the feast
S.3780 - 115th Congress (2017-2018): KITTEN Act of 2018
S.708 - 116th Congress (2019-2020): KITTEN Act of 2019
Scrap and his kitten
[Girl with sleeping kitten]
Calvin Coolidge [with kitten]
[Domestic cat nursing kittens]
[Kitten's party (child study)]


In [24]:
data = json.loads(r.text)

for record in data:
    print(record)

breadcrumbs
expert_resources
facet_trail
facet_views
facets
form_facets
notice
options
pagination
results
search
timestamp
views


In [25]:
kittensList = data['results']

for kitten in kittensList:
    print(kitten)

{'access_restricted': False, 'aka': ['http://lccn.loc.gov/2017951332', 'http://firstsearch.oclc.org/WebZ/DECRead?standardNoType=1&standardNo=0062269879&sessionid=0&srcdbname=worldcat&key=521ed68bb5b0157a1afddd629df06a7f68e02fb2dbfb5ede2fb6d60b5bf8eaed&ectype=MOREINFO', 'http://www.loc.gov/item/2017951332/'], 'campaigns': [], 'contributor': ["o'connor, jane", 'preiss-glasser, robin'], 'date': '2018', 'dates': ['2018'], 'description': ['After Nancy and Bree find kittens out in the rain, Nancy decides to keep one, putting her posh pup, Frenchy, on edge.'], 'digitized': True, 'extract_timestamp': '2023-06-02T15:48:18.498Z', 'group': ['catalog-split-01', 'catalog', 'main-catalog-split-01', 'main-catalog'], 'hassegments': False, 'id': 'http://lccn.loc.gov/2017951332', 'image_url': [], 'index': 1, 'item': {'call_number': ['PZ7.O222 Fgk 2018'], 'contributors': ["O'Connor, Jane, author.", 'Preiss-Glasser, Robin, illustrator.'], 'created_published': ['New York, NY : Harper, an imprint of HarperC

Note in the above that the items in this list are not numbered,
so in order to pull out the first one, you need to provide an index.
To do this with the original data object, you can use a notation like the following,
which could be a way to investigate the information about individual items:

In [28]:
data['results'][0]['title']

'Fancy Nancy : oodles of kittens'

To see what individual metadata fields are available for each object,
you can do the following.

_Remember that in JSON, the data structure allows for nested lists (aka, arrays or dictionaries),
so many of these do not correspond to a single value. For example, `creators` often has more than one value._

In [29]:
for field in data['results'][0]:
    print(field)

access_restricted
aka
campaigns
contributor
date
dates
description
digitized
extract_timestamp
group
hassegments
id
image_url
index
item
language
number
number_lccn
number_oclc
number_source_modified
original_format
other_title
partof
resources
shelf_id
site
subject
timestamp
title
type
url
