In [1]:
import os
import random
import twitter
import requests

Request the full collection

In [2]:
url = "https://www.loc.gov/collections/sanborn-maps/?fo=json"

In [3]:
r = requests.get(url)

In [4]:
data = r.json()

Pull a random page

In [22]:
max_page = data['pagination']['total']

In [24]:
max_page

1402

In [25]:
random_page = random.randrange(1, max_page + 1)

In [26]:
random_page

319

In [7]:
random_url = url + f"&sp={random_page}"

In [8]:
random_r = requests.get(random_url)

In [9]:
random_data = random_r.json()

Get all the items on that page

In [10]:
def get_items(page):
    items = []
    results = page['results']
    for result in results:
        # Filter out anything that's a colletion or web page
        filter_out = ("collection" in result.get("original_format")) or ("web page" in result.get("original_format")) 
        if not filter_out:
            # Get the link to the item record
            if result.get("id"):
                item = result.get("id")
                # Filter out links to Catalog or other platforms
                if item.startswith("http://www.loc.gov/item"):
                    items.append(result)
    return items

In [11]:
items = get_items(random_data)

Get a random item

In [12]:
random_item = random.choice(items)

In [13]:
random_item['title'], random_item['url']

('Sanborn Fire Insurance Map from Urbanna, Middlesex County, Virginia.',
 'https://www.loc.gov/item/sanborn09084_001/')

Clean up the title

In [14]:
def get_clean_title(title):
    s = title.strip()
    s = s.replace("Sanborn Fire Insurance Map from ", "")
    return s.strip()

In [15]:
clean_title = get_clean_title(random_item['title'])

In [16]:
clean_title

'Urbanna, Middlesex County, Virginia.'

Get all the images for that item

In [17]:
def get_images(item):
    params = {"fo": "json"}
    r = requests.get(item, params=params)
    data = r.json()
    jpg_list = []
    for resource_index, resource in enumerate(data['resources']):
        resource_url = data['item']['resources'][resource_index]['url']
        for index, file_list in enumerate(resource['files']):
            # Get the JPGs
            jpgs = [f for f in file_list if f['mimetype'] == 'image/jpeg']
            # Take the biggest one
            jpgs = sorted(jpgs, key=lambda x: x['width'], reverse=True)
            jpg_list.append(jpgs[0]['url'])
    return jpg_list

In [18]:
jpg_list = get_images(random_item['id'])

Trim down to the first four images, since that's all Twitter allows

In [19]:
jpg_list[:4]

['https://tile.loc.gov/image-services/iiif/service:gmd:gmd388m:g3884m:g3884um:g3884um_g090841921:09084_1921-0001/full/pct:25/0/default.jpg',
 'https://tile.loc.gov/image-services/iiif/service:gmd:gmd388m:g3884m:g3884um:g3884um_g090841921:09084_1921-0002/full/pct:25/0/default.jpg',
 'https://tile.loc.gov/image-services/iiif/service:gmd:gmd388m:g3884m:g3884um:g3884um_g090841921:09084_1921-0003/full/pct:25/0/default.jpg',
 'https://tile.loc.gov/image-services/iiif/service:gmd:gmd388m:g3884m:g3884um:g3884um_g090841921:09084_1921-0004/full/pct:25/0/default.jpg']

Tweet it

In [40]:
api = twitter.Api(
    consumer_key=os.getenv('TWITTER_CONSUMER_KEY'),
    consumer_secret=os.getenv('TWITTER_CONSUMER_SECRET'),
    access_token_key=os.getenv('TWITTER_ACCESS_TOKEN_KEY'),
    access_token_secret=os.getenv('TWITTER_ACCESS_TOKEN_SECRET')
)

In [41]:
tweet = f"{clean_title} {random_item['url']}"

In [42]:
tweet

'Mechanicsburg, Champaign County, Ohio. https://www.loc.gov/item/sanborn06793_003/'

In [43]:
api.PostUpdate(tweet, media=jpg_list[:4])

Status(ID=1450808254175481858, ScreenName=sanbornmaps, Created=Wed Oct 20 12:56:57 +0000 2021, Text='Mechanicsburg, Champaign County, Ohio. https://t.co/Ba3CPefj6R https://t.co/F3oGiBS5mz')