## HTTP and Python demos

demo notebook

In [1]:
import requests

## Defining a URL / URI



In [2]:
url = 'https://www.loc.gov'
lccn_url = 'https://lccn.loc.gov'

In [3]:
r = requests.get(url)

In [4]:
r.url 

'https://www.loc.gov/'

In [5]:
r.status_code

200

In [6]:
r.headers 

{'Date': 'Mon, 07 Oct 2024 15:06:49 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Content-Length': '175996', 'Connection': 'keep-alive', 'access-control-allow-origin': '*', 'referrer-policy': 'no-referrer-when-downgrade', 'strict-transport-security': 'max-age=3600; preload', 'x-content-type-options': 'nosniff', 'x-frame-options': 'sameorigin', 'etag': '"2ed93bba2690410821c95e37a63cc1a0"', 'expires': 'Mon, 07 Oct 2024 15:48:12 GMT', 'content-security-policy': "block-all-mixed-content;         default-src https://loc.gov/ https://*.loc.gov/ ;         media-src https://loc.gov/ https://*.loc.gov/              https://*.readspeaker.com/             https://*.arcgis.com/ https://*.arcgisonline.com/  https://webapps-cdn.esri.com/             blob:;         worker-src https://loc.gov/ https://*.loc.gov/              blob:;         font-src https://loc.gov/ https://*.loc.gov/              https://*.arcgis.com/ https://*.arcgisonline.com/  https://webapps-cdn.esri.com/             https://

In [8]:
for header in r.headers:
    print(header, ':', r.headers[header])

Date : Mon, 07 Oct 2024 15:06:49 GMT
Content-Type : text/html; charset=UTF-8
Content-Length : 175996
Connection : keep-alive
access-control-allow-origin : *
referrer-policy : no-referrer-when-downgrade
strict-transport-security : max-age=3600; preload
x-content-type-options : nosniff
x-frame-options : sameorigin
etag : "2ed93bba2690410821c95e37a63cc1a0"
expires : Mon, 07 Oct 2024 15:48:12 GMT
content-security-policy : block-all-mixed-content;         default-src https://loc.gov/ https://*.loc.gov/ ;         media-src https://loc.gov/ https://*.loc.gov/              https://*.readspeaker.com/             https://*.arcgis.com/ https://*.arcgisonline.com/  https://webapps-cdn.esri.com/             blob:;         worker-src https://loc.gov/ https://*.loc.gov/              blob:;         font-src https://loc.gov/ https://*.loc.gov/              https://*.arcgis.com/ https://*.arcgisonline.com/  https://webapps-cdn.esri.com/             https://ssl.p.jwpcdn.com/             data:;         im

## Looking at the response


In [9]:
r.text 

'<!DOCTYPE html>\n\n\n<html lang="en" class="no-js" prefix="lc: http://loc.gov/#">\n<head>\n\n<meta name="description"\n      content="The Library of Congress is the nation\'s oldest federal cultural institution, and it serves as the research arm of Congress. It is also the largest library in the world, with more than 162 million items. The collections include books, sound recordings, motion pictures, photographs, maps, and manuscripts." />\n\n<meta name="dc.identifier"\n      content="http://www.loc.gov/" />\n\n<meta rel="canonical"\n      href="http://loc.gov/" />\n\n    \n<meta charset="utf-8">\n<meta name="viewport" content="width=device-width,initial-scale=1"/>\n<meta http-equiv="X-UA-Compatible" content="IE=edge">\n<meta name="version" content="$Revision$"/>\n<meta name="msvalidate.01" content="5C89FB9D99590AB2F55BD95C3A59BD81"/>\n<link title="schema(DC)" rel="schema.dc" href="http://purl.org/dc/elements/1.1/"/>\n<meta name="dc.language" content="eng" />\n<meta name="dc.source" c

## Modifying our request with parameters

In [10]:
parameters = {'fo':'json'}

In [11]:
r = requests.get(url, params=parameters)

In [12]:
print(r.url, r.status_code)

https://www.loc.gov/?fo=json 200


In [13]:
r.text

'{"_featured_events": [{"category": "Concert", "day": 30, "link": "/item/event-398220/", "month": "Jan", "time": "8:00 p.m. - 10:00 p.m.", "title": "Dunedin Consort"}, {"category": "Concert", "day": 31, "link": "/item/event-398223/", "month": "Jan", "time": "8:00 p.m. - 10:00 p.m.", "title": "Asko|Sch\\u00f6nberg Ensemble Concert I"}, {"category": "Lecture", "day": 5, "link": "/item/event-397800/", "month": "Feb", "time": "11:00 a.m. - Noon", "title": "Gallery Talk: Rosa Parks, Before the Bus"}], "_featured_exhibits": [{"dates": "Through September 2020", "link": "/exhibitions/rosa-parks-in-her-own-words/about-this-exhibition/", "thumb": "/static/home/images/your-library/exhibitions/Rosa=Parks-Terek-Portrait-130x84.jpg", "title": "Rosa Parks: In Her Own Words"}, {"dates": "Through September 2020", "link": "/exhibitions/women-fight-for-the-vote/about-this-exhibition/", "thumb": "/static/home/images/your-library/exhibitions/exhibitions-women-suffrage.jpg", "title": "Shall Not Be Denied: W

In [18]:
parameters = {'q':'kittens','fo':'json'}

In [16]:
search_url = 'https://www.loc.gov/search'

In [19]:
r = requests.get(search_url, params=parameters)
print(r.url, r.status_code)

https://www.loc.gov/search?q=kittens&fo=json 200


## Request an item and its metadata from LOC

In [22]:
url

'https://www.loc.gov'

In [23]:
item_info = '/resource/ds.06560/'

In [24]:
r = requests.get(url + item_info)
print(r.url)

https://www.loc.gov/resource/ds.06560/


In [25]:
parameters = {'fo':'json'}

In [26]:
r = requests.get(url + item_info, params=parameters)
print(r.url, r.status_code)

https://www.loc.gov/resource/ds.06560/?fo=json 200


## Look at the JSON response & find metadata

In [27]:
r.text 

'{"articles_and_essays": [{"score": 6.800294, "title": "Postcards: Greetings from the Prints & Photographs Division", "url": "https://guides.loc.gov/postcards"}], "cite_this": {"apa": "Gilbert, C. <cite>Main public library building - Detroit</cite>. Detroit Michigan, None. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] [Photograph] Retrieved from the Library of Congress, https://www.loc.gov/item/2014647618/.", "chicago": "Gilbert, Cass, Architect. <cite>Main public library building - Detroit</cite>. Detroit Michigan, None. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] Photograph. https://www.loc.gov/item/2014647618/.", "mla": "Gilbert, Cass, Architect. <cite>Main public library building - Detroit</cite>. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] Photograph. Retrieved from the Library of Congress, &lt;www.loc.gov/item/2014647618/&gt;."}, "item": {"_version_": 1768319195339030528, "access_restricted": false, "aka": ["https://www.loc.gov/pictur

In [28]:
item_metadata = r.json()

In [29]:
print(item_metadata)

{'articles_and_essays': [{'score': 6.800294, 'title': 'Postcards: Greetings from the Prints & Photographs Division', 'url': 'https://guides.loc.gov/postcards'}], 'cite_this': {'apa': 'Gilbert, C. <cite>Main public library building - Detroit</cite>. Detroit Michigan, None. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] [Photograph] Retrieved from the Library of Congress, https://www.loc.gov/item/2014647618/.', 'chicago': 'Gilbert, Cass, Architect. <cite>Main public library building - Detroit</cite>. Detroit Michigan, None. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] Photograph. https://www.loc.gov/item/2014647618/.', 'mla': 'Gilbert, Cass, Architect. <cite>Main public library building - Detroit</cite>. [Brooklyn, n.y.: the albertype co., between 1921 and 1930?] Photograph. Retrieved from the Library of Congress, &lt;www.loc.gov/item/2014647618/&gt;.'}, 'item': {'_version_': 1768319195339030528, 'access_restricted': False, 'aka': ['https://www.loc.gov/picture

In [30]:
for data in item_metadata:
    print(data)

articles_and_essays
cite_this
item
more_like_this
options
page
pagination
related
related_items
reproductions
resource
resources
thesaurus_entry
timestamp
type
unrestricted
views


In [31]:
item_metadata['item']

{'_version_': 1768319195339030528,
 'access_restricted': False,
 'aka': ['https://www.loc.gov/pictures/item/2014647618/',
  'http://www.loc.gov/item/2014647618/',
  'http://www.loc.gov/pictures/item/2014647618/',
  'https://www.loc.gov/pictures/collection/cph/item/2014647618/',
  'http://www.loc.gov/pictures/collection/cph/item/2014647618/',
  'https://hdl.loc.gov/loc.pnp/ds.06560',
  'https://hdl.loc.gov/loc.pnp/ds.06560',
  'http://www.loc.gov/resource/ds.06560/',
  'http://lccn.loc.gov/2014647618'],
 'call_number': 'PCRD14, no. 171 [P&P]',
 'campaigns': [],
 'contributor_names': ['Gilbert, Cass, 1859-1934, architect'],
 'contributors': [{'gilbert, cass': 'https://www.loc.gov/search/?fa=contributor:gilbert,+cass&fo=json'}],
 'control_number': '',
 'created': '2023-06-06T06:30:58Z',
 'created_published': ['[Brooklyn, N.Y.] : [The Albertype Co.], [between 1921 and 1930?]'],
 'created_published_date': '[between 1921 and 1930?]',
 'date': '1921',
 'dates': [{'1921': 'https://www.loc.gov/

In [32]:
item_metadata['item']['library_of_congress_control_number']

'2014647618'

In [33]:
item_lccn = item_metadata['item']['library_of_congress_control_number']
print(item_lccn)

2014647618


# Reuse that metadata to construct the LCCN URL... the permalink!

In [37]:
r = requests.get(lccn_url + '/' + item_lccn)
print(r.url, r.status_code)

https://catalog.loc.gov/vwebv/search?searchCode=LCCN&searchArg=2014647618&searchType=1&permalink=y 200


In [39]:
r = requests.get(lccn_url + '/' + item_lccn + '/dc')
print(r.url, r.status_code)
print(r.text)

https://lccn.loc.gov/2014647618/dc 200
<?xml version="1.0" encoding="UTF-8"?><srw_dc:dc xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:zs="http://docs.oasis-open.org/ns/search-ws/sruResponse" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:srw/schema/1/dc-schema http://www.loc.gov/standards/sru/resources/dc-schema.xsd">
  <title xmlns="http://purl.org/dc/elements/1.1/">Main public library building - Detroit [graphic].</title>
  <creator xmlns="http://purl.org/dc/elements/1.1/">Gilbert, Cass, 1859-1934, architect.</creator>
  <type xmlns="http://purl.org/dc/elements/1.1/">still image</type>
  <type xmlns="http://purl.org/dc/elements/1.1/">Advertisements 1920-1930. gmgpc</type>
  <type xmlns="http://purl.org/dc/elements/1.1/">Postcards 1920-1930. gmgpc</type>
  <type xmlns="http://purl.org/dc/elements/1.1/">Architectural photographs 1920-1930. gmgpc</type>
  <type xmlns="http://purl.org/dc/elements/1.1/">Collotypes Hand-colored 1920-1930. gmgpc</type>
  <