In [1]:
import pandas as pd
import json
import random
from pathlib import Path
import os

In [2]:
#  fakesection: set up logging for requests # 
import requests
import logging
import http.client

# https://stackoverflow.com/questions/16337511/
http.client.HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True

In [7]:
#  fakesection: import Wood's NARA metadata
parentDirectory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
df = pd.read_csv(os.path.join(parentDirectory, 'import', '2018-05-16-NARA-master-manifest/all.csv'))

# trim spaces and remove redundant column
df.rename(str.strip, axis='columns',inplace=True)
df.drop(columns=['Box or Volume Number.1'], inplace=True)

In [8]:
df.head()

Unnamed: 0,Ship Name,Record Group,Entry Number,Box or Volume Number,Digital Directory,Start Date,End Date,Assets,Number of Images,Number of Pages,NARA URL,Geographic Focus
0,ABARENDA,24.0,118,vol01of29,vol01of29 (1899-04-01_1899-09-30),04/01/1899,09/30/1899,1,193,386,Shipped March 2017,Chinese Waters
1,ABARENDA,24.0,118,vol02of29,vol02of29 (1899-10-01_1900-03-31),10/01/1899,03/31/1900,1,193,386,Shipped March 2017,Chinese Waters
2,ABARENDA,24.0,118,vol03of39,vol03of39 (1900-04-01_1900-09-30),04/01/1900,09/30/1900,1,191,382,Shipped March 2017,Chinese Waters
3,ABARENDA,24.0,118,vol04of29,vol04of29 (1900-10-01_1901-04-12),10/01/1900,04/12/1901,1,205,410,Shipped March 2017,Chinese Waters
4,ABARENDA,24.0,118,vol05of29,vol05of29 (1901-04-01_1901-11-28),04/01/1901,11/28/1901,1,275,550,Shipped March 2017,Chinese Waters


In [9]:
# take sample from each record group
NARA_record_group_dict = dict([(23, 'USCS'), # Records of the Coast and Geodetic Survey
                               (24, 'Navy'), # Records of the Bureau of Naval Personnel
                               (26, 'CG'), # Records of the U.S. Coast Guard
                               (261, 'RAC') # Records of Former Russian Agencies
                              ])
sample = pd.concat([df.loc[df['Record Group'] == gp].sample(5, random_state=1)\
                    for gp in NARA_record_group_dict])

# drop entries without a valid NARA URL
ndf = sample[~sample['NARA URL'].str.contains(" ")]
# TODO pair all entries with valid NARA URLs <ccg, 2019-06-02>

In [10]:
ndf

Unnamed: 0,Ship Name,Record Group,Entry Number,Box or Volume Number,Digital Directory,Start Date,End Date,Assets,Number of Images,Number of Pages,NARA URL,Geographic Focus
1744,HASSLER,23.0,102,Volume 71 and 74,hassler-1876,01/01/1876,09/24/1876,2,281,561,https://catalog.archives.gov/id/24332142,Arctic
1349,DALE (DD-353),24.0,118-A1,b2721,Dale-DD-353-1943-03,03/01/1943,03/31/1943,1,67,67,https://catalog.archives.gov/id/24357119,Arctic
1850,IDAHO (BB-42),24.0,118-A1,Box 4796,idaho-bb-42-1944-05,05/01/1944,05/31/1944,1,101,101,http://catalog.archives.gov/id/17298664,Arctic
1081,CHELAN,26.0,159A,Box 541,chelan-1935-10,10/01/1935,10/31/1935,1,42,83,https://catalog.archives.gov/id/23678516,Arctic
3114,STORIS,26.0,330-O,"b911, b912",storis-1954,01/01/1954,12/31/1954,2,594,1188,http://catalog.archives/gov/id/38547956,Arctic
3121,STORIS,26.0,330-V,b822,storis-1957 logbooks,09/06/1957,09/06/1957,1,20,40,http://catalog.archives/gov/id/38547962,Arctic
2098,MANNING,26.0,159-A,"Boxes 1455, 1456 and 1457",manning-1914,01/01/1914,12/31/1914,3,477,941,https://catalog.archives.gov/id/24407615,Arctic


In [11]:
#  fakesection: download each image under a given nara_id # 

def download_nara_entry(entry): # entry is assumed to be a *DataFrame*

    # access NARA API
    nara_id = entry['NARA URL'].iloc[0].split("/")[-1]
    api_base = 'https://catalog.archives.gov/api/v1/'
    api_url = '{0}?naIds={1}'.format(api_base, nara_id)
    res = requests.get(api_url)

    # metadata from Wood's all.csv (which is redundant, given NARA's metadata)
    # base_url = 'https://catalog.archives.gov/'
    # record_group = "rg-0{0}".format(int(entry['Record Group'].iloc[0]))
    # num_images = int(entry['Number of Images'].iloc[0])
    # digital_directory = ['Digital Directory'].iloc[0]

    # parse NARA API output for metadata
    entry_img_array = res.json().get('opaResponse').get('results').get('result')[0]\
                      .get('objects').get('object')
    digital_directory = entry_img_array[0].get('file').get('@path').split("/")[-2]

    # create local directories if needed
    parentDirectory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    paths = dict([(k,os.path.join(parentDirectory, 'import', k))\
                  for k in ['data', 'metadata']])
    for k, val in paths.items():
        p = Path(val)
        if (p.exists() == False and p.is_dir() == False):
            os.mkdir(val)

    # write NARA API output to file for reference
    api_output = "{0}/nara_id_{1}.json".format(paths['metadata'], digital_directory, nara_id)
    if res.status_code == 200:
        with open(api_output, 'wb') as f:
            f.write(res.content)

    # download all images for this unique nara_id
    for img_info in entry_img_array: 

        # test for mimetype "image/jpeg"
        # we don't want "application/pdf"
        if img_info.get('file').get('@mime') == "image/jpeg":

            img_name = img_info.get('file').get('@name')
            img_url = img_info.get('file').get('@url')
            img_res = requests.get(img_url)

            # create subdirectory if needed
            img_path = '{0}/nara_id_{1}'\
                       .format(paths['data'], nara_id)
            img_p = Path(img_path)
            if (img_p.exists() == False and img_p.is_dir() == False):
                os.mkdir(img_path)

            # write a single image to file
            local_img_name = "{0}/{1}".format(img_path, img_name)
            if img_res.status_code == 200:
                with open(local_img_name, 'wb') as img_f:
                    img_f.write(img_res.content)

return None

In [12]:
#  fakesection: tests # 

# test function for a single entry
download_nara_entry(ndf.sample(1, random_state=0))

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443
DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /api/v1/?naIds=24407615 HTTP/1.1" 200 None


send: b'GET /api/v1/?naIds=24407615 HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'
reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: application/json;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:43:18 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: tran

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0001.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0001.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:43:59 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0002.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0002.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:04 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0003.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0003.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:15 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0004.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0004.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:21 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0005.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0005.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:26 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0006.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0006.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:32 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443
DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0007.JPG HTTP/1.1" 200 None


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0007.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'
reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:40 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-S

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0008.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0008.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:45 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0009.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0009.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:50 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443
DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0010.JPG HTTP/1.1" 200 None


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0010.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'
reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:44:55 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-S

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0011.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0011.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:45:04 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0012.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0012.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:45:14 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0013.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0013.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:45:31 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0014.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0014.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:45:41 GMT
header: hnweb: pw03
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0015.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0015.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:46:04 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0016.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0016.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:46:17 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0017.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0017.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:46:31 GMT
header: hnweb: pw04
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0018.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0018.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:46:46 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0019.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0019.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:46:56 GMT
header: hnweb: pw04
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0020.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0020.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:47:10 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0021.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0021.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:47:19 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0022.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0022.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:47:30 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0023.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0023.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:47:43 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0024.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0024.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:47:54 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0025.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0025.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:48:05 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0026.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0026.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:48:19 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0027.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0027.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:48:32 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0028.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0028.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:48:44 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0029.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0029.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa01.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa01
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:48:54 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0030.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0030.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:49:05 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0031.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0031.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:49:15 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0032.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0032.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:49:24 GMT
header: hnweb: pw02
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0033.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0033.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa04.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa04
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:49:37 GMT
header: hnweb: pw04
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0034.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0034.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa02.aws.narasearch.us:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa02
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:49:50 GMT
header: hnweb: pw01
header: Pragma: no-cache
header: Server: Apache/2.4.6 (Red Hat)
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): catalog.archives.gov:443


send: b'GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0035.JPG HTTP/1.1\r\nHost: catalog.archives.gov\r\nUser-Agent: python-requests/2.21.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\n\r\n'


DEBUG:urllib3.connectionpool:https://catalog.archives.gov:443 "GET /OpaAPI/media/24407615/content/dc-metro/rg-026/585454/0001-A1/Manning-1914/Manning-1914_0035.JPG HTTP/1.1" 200 None


reply: 'HTTP/1.1 200 OK\r\n'
header: Access-Control-Allow-Credentials: true
header: Access-Control-Allow-Methods: DELETE, HEAD, GET, OPTION, POST, PUT
header: Access-Control-Expose-Headers: JSESSIONID
header: Access-Control-Max-Age: 3600
header: BALANCER_NAME: (null)
header: BALANCER_ROUTE_CHANGED: 1
header: BALANCER_SESSION_ROUTE: (null)
header: BALANCER_SESSION_STICKY: (null)
header: BALANCER_WORKER_NAME: ajp://pa03.aws.nac.nara.gov:8009/OpaAPI
header: BALANCER_WORKER_ROUTE: pa03
header: Cache-Control: no-store, no-cache
header: Content-Type: image/jpeg;charset=UTF-8
header: Date: Thu, 06 Jun 2019 19:50:00 GMT
header: hnweb: pw04
header: Pragma: no-cache
header: Server: Apache
header: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
header: transfer-encoding: chunked
header: Connection: keep-alive


KeyboardInterrupt: 

In [None]:
#  fakesection: do actually download for each sample entry
# ndf.groupby('NARA URL').apply(download_nara_entry)