In [None]:
import os
import random
import twitter
import requests
from mastodon import Mastodon
from pathlib import Path

Request the full collection

In [None]:
url = "https://www.loc.gov/collections/sanborn-maps/?fo=json"

In [None]:
r = requests.get(url)

In [None]:
data = r.json()

Pull a random page

In [None]:
max_page = data['pagination']['total']

In [None]:
max_page

In [None]:
random_page = random.randrange(1, max_page + 1)

In [None]:
random_page

In [None]:
random_url = url + f"&sp={random_page}"

In [None]:
random_r = requests.get(random_url)

In [None]:
random_data = random_r.json()

Get all the items on that page

In [None]:
def get_items(page):
    items = []
    results = page['results']
    for result in results:
        # Filter out anything that's a colletion or web page
        filter_out = ("collection" in result.get("original_format")) or ("web page" in result.get("original_format")) 
        if not filter_out:
            # Get the link to the item record
            if result.get("id"):
                item = result.get("id")
                # Filter out links to Catalog or other platforms
                if item.startswith("http://www.loc.gov/item"):
                    items.append(result)
    return items

In [None]:
items = get_items(random_data)

Get a random item

In [None]:
random_item = random.choice(items)

In [None]:
random_item['title'], random_item['url']

Clean up the title

In [None]:
def get_clean_title(title):
    s = title.strip()
    s = s.replace("Sanborn Fire Insurance Map from ", "")
    return s.strip()

In [None]:
clean_title = get_clean_title(random_item['title'])

In [None]:
clean_title

Get all the images for that item

In [None]:
def get_images(item):
    params = {"fo": "json"}
    r = requests.get(item, params=params)
    data = r.json()
    jpg_list = []
    for resource_index, resource in enumerate(data['resources']):
        resource_url = data['item']['resources'][resource_index]['url']
        for index, file_list in enumerate(resource['files']):
            # Get the JPGs
            jpgs = [f for f in file_list if f['mimetype'] == 'image/jpeg']
            # Take the biggest one
            jpgs = sorted(jpgs, key=lambda x: x['width'], reverse=True)
            jpg_list.append(jpgs[0]['url'])
    return jpg_list

In [None]:
jpg_list = get_images(random_item['id'])

Trim down to the first four images, since that's all Twitter allows

In [None]:
jpg_list[:4]

Toot it

In [None]:
api = Mastodon(
    client_id=os.getenv("MASTODON_CLIENT_KEY"),
    client_secret=os.getenv("MASTODON_CLIENT_SECRET"),
    access_token=os.getenv("MASTODON_ACCESS_TOKEN"),
    api_base_url="https://mastodon.palewi.re",
)

In [None]:
def download_url(url: str, output_path: Path, timeout: int = 180):
    """Download the provided URL to the provided path."""
    with requests.get(url, stream=True, timeout=timeout) as r:
        r.raise_for_status()
        with open(output_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return output_path

In [None]:
local_jpg = [download_url(j, Path(f"{i}.jpg")) for i, j in enumerate(jpg_list[:4])]

In [None]:
media_ids = [api.media_post(p) for p in local_jpg]

In [None]:
status = f"{clean_title} {random_item['url']}"

In [None]:
post = api.status_post(status, media_ids=[m['id'] for m in media_ids])