In [1]:
from datetime import datetime

import requests
from pydantic import BaseModel, HttpUrl

## 1. List of likes articles


In [2]:
POCKET_HEADERS = {
    "Content-Type": "application/json; charset=UTF8",
    "X-Accept": "application/json",
}
POCKET_CONSUMER_KEY = "108134-83b2a1ee1df047f0bb2598a"
POCKET_ACCESS_TOKEN = "17dcf008-b6a5-ce23-dc51-379858"

In [3]:
class Article(BaseModel):
    pocket_item_id: int
    given_url: HttpUrl
    resolved_url: HttpUrl
    title: str
    time_added: datetime
    word_count: int
    domain: str

In [6]:
def get_pocket_items(state: str = "archive") -> list[Article]:
    """
    Get all items archive in pocket
    """
    pocket_items = []
    count = 500
    offset = 0
    while True:
        payload = {
            "consumer_key": POCKET_CONSUMER_KEY,
            "access_token": POCKET_ACCESS_TOKEN,
            "state": state,
            "sort": "newest",
            "count": count,
            "offset": offset,
            "detailType": "complete",
            "total": "1",  ## total number of archived articles
        }

        url = "https://getpocket.com/v3/get"
        r = requests.request("Post", url=url, headers=POCKET_HEADERS, json=payload)

        for item in r.json()["list"]:
            pocket_item = r.json()["list"][item]
            try:
                pocket_items.append(
                    Article(
                        pocket_item_id=int(pocket_item["item_id"]),
                        given_url=pocket_item["given_url"],
                        resolved_url=pocket_item["resolved_url"],
                        title=pocket_item["resolved_title"],
                        time_added=datetime.fromtimestamp(
                            int(pocket_item["time_added"])
                        ),
                        word_count=int(pocket_item["word_count"]),
                        domain=pocket_item.get("domain_metadata", {}).get("name", ""),
                    )
                )
            except Exception as e:
                print(f"Error processing item {pocket_item['item_id']}: {e}")
                continue

        if int(r.json()["total"]) > count + offset:
            print(f"Fetching more articles... {count + offset} / {r.json()['total']}")
            offset += count
        else:
            print(f"Fetched {offset + count} articles")
            break

    return pocket_items


pocket_items = get_pocket_items()
print(len(pocket_items))
pocket_items[0]

Fetching more articles... 500 / 3692
Fetching more articles... 1000 / 3692
Fetching more articles... 1500 / 3692
Fetching more articles... 2000 / 3692
Error processing item 3945908435: 1 validation error for Article
resolved_url
  Input should be a valid URL, input is empty [type=url_parsing, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/url_parsing
Fetching more articles... 2500 / 3692
Fetching more articles... 3000 / 3692
Fetching more articles... 3500 / 3692
Fetched 4000 articles
3691


Article(pocket_item_id=5598506, given_url=HttpUrl('https://nat.org/'), resolved_url=HttpUrl('http://nat.org/'), title='Nat Friedman', time_added=datetime.datetime(2025, 1, 15, 11, 20, 58), word_count=451, domain='nat.org')