In [None]:
from xml.etree import ElementTree
from xml.dom import minidom
from typing import Dict

import requests
from IPython.display import display, HTML

from secrets.config import config

In [None]:
def get_url(url: str) -> str:
    try:
        response = requests.get(url)
        if response.ok:
            return response.content.decode('utf-8')
        else:
            raise Exception('invalid response code', response)
    except Exception as e:
        raise e

def get_token(login_url: str='https://commerce.reuters.com/rmd/rest/xml/login?username={username}&password={password}',
              config: Dict[str, str] = None) -> str:
    login_url = login_url.format(username=config['user'], password=config['password'])
    content = get_url(login_url)
    root = ElementTree.fromstring(content)
    return root.text

auth_token = get_token(config=config)

In [None]:
channels_raw = get_url(f'https://rmb.reuters.com/rmd/rest/xml/channels?&token={auth_token}')

In [None]:
channels = minidom.parseString(channels_raw)
print(channels.toprettyxml())

In [None]:
root = ElementTree.fromstring(channels_raw)
for ci in root.findall('channelInformation'):
    category = ci.find('category')
    print(category.get('id'))

In [None]:
items_raw = get_url(f'https://rmb.reuters.com/rmd/rest/xml/items?channel=BEQ259&mediaType=T&token={auth_token}')
items = minidom.parseString(items_raw)
print(items.toprettyxml())

In [None]:
item_id = 'tag:reuters.com,2017:newsml_KCN1BQ2AV:6'
item_raw = get_url(f'https://rmb.reuters.com/rmd/rest/xml/item?id={item_id}&token={auth_token}')
item = minidom.parseString(item_raw)
print(item.toprettyxml())

In [None]:
display(HTML(item_raw))

In [None]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(item_raw, 'lxml')
soup.find('title').text

In [None]:
[c.text for c in soup.find('inlinexml').find_all('p')]