Skip to content

Commit

Permalink
Create load_data and load_schema APIs [RHELDST-4862]
Browse files Browse the repository at this point in the history
The load_schema API allows clients to more easily consume the
schema.json file that is included in the cdn-definitions package.

The load_data API will return a raw dictionary of the chosen data
file. This will allow users to access various types of data in a
uniform way.

In order to encourage a single, consistent method of access to
the cdn-definitions data, the legacy access methods and related
classes (i.e., PathAlias, rhui_alias, and origin_alias) will be
deprecated. A dedicated Python object will no longer need to be
created for each new addition to the schema.

The load_data API also includes an optional "source" argument to
the load_data method. The source parameter allows the user to
specify the source of the cdn_definitions JSON or YAML data file.
The source could be a URL or a local path in a directory tree.

If a source is not specified, the location may be overriden via the
CDN_DEFINITIONS_PATH environment variable, which also may either
be in the form of a URL or a local path.

If neither a source is specified, nor a CDN_DEFINITIONS_PATH
environment variable is set, the data.yaml file included in the
cdn-definitions package will be used as a source. If that file is
unavailable, load_data will attempt to load data from
/usr/share/cdn-definitions/data.yaml. If all of the sources are
invalid, a RuntimeError will be thrown.
  • Loading branch information
crungehottman committed Feb 7, 2021
1 parent f2c8dfd commit 29e374d
Show file tree
Hide file tree
Showing 6 changed files with 193 additions and 28 deletions.
20 changes: 9 additions & 11 deletions docs/userguide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ the ``cdn_definitions`` module, as in example:

.. code-block:: python
from cdn_definitions import rhui_aliases
from cdn_definitions import load_data
for alias in rhui_aliases():
if my_path.startswith(alias.src):
# my path falls under a /rhui/ alias,
# so now do something special
DATA = load_data(source="https://raw.githubusercontent.com/release-engineering/cdn-definitions/master/src/cdn_definitions/data.json")
for alias in DATA["rhui_alias"]:
if my_path.startswith(alias["src"]):
# my path falls under a /rhui/ alias,
# so now do something special
The library will use data from the first existing of the following sources:
If a source is not specified, the library will use data from the first existing of the following sources:

- A JSON or YAML file pointed at by the ``CDN_DEFINITIONS_PATH`` environment variable.
- The file bundled with the library on PyPI.
Expand All @@ -63,9 +64,6 @@ Python reference

.. module:: cdn_definitions

.. autoclass:: PathAlias()
:members:
.. autofunction:: load_data

.. autofunction:: rhui_aliases

.. autofunction:: origin_aliases
.. autofunction:: load_schema
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
PyYAML
requests
4 changes: 2 additions & 2 deletions src/cdn_definitions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from ._impl import PathAlias, origin_aliases, rhui_aliases
from ._impl import PathAlias, origin_aliases, rhui_aliases, load_data, load_schema

__all__ = ["PathAlias", "origin_aliases", "rhui_aliases"]
__all__ = ["PathAlias", "origin_aliases", "rhui_aliases", "load_data", "load_schema"]
117 changes: 103 additions & 14 deletions src/cdn_definitions/_impl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,103 @@
import json
import os
import warnings

import requests
import yaml
from urllib3.util.retry import Retry


def to_dict(data, ext):
"""
Converts a string or a requests.Response object to a dict.
"""
if ext.lower() == ".json":
return json.loads(data.json() if isinstance(data, requests.Response) else data)
return yaml.load(
data.text if isinstance(data, requests.Response) else data, yaml.SafeLoader
)

def load_data():
# Load data from first existing of these
candidate_paths = [
os.path.join(os.path.dirname(os.path.dirname(__file__)), "data.yaml"),
"/usr/share/cdn-definitions/data.yaml",
]

# If env var is set, it takes highest precedence
if "CDN_DEFINITIONS_PATH" in os.environ:
candidate_paths.insert(0, os.environ["CDN_DEFINITIONS_PATH"])
def get_remote_data(url):
"""
Creates a requests session with retries. If the request was successful, returns the response.
"""
retry_strategy = Retry(
status_forcelist=[429, 500, 502, 503, 504],
)
adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
session = requests.Session()
session.mount(url, adapter)

existing_paths = [p for p in candidate_paths if os.path.exists(p)]
response = session.get(url, timeout=10)

_, ext = os.path.splitext(existing_paths[0])
if ext.lower() == ".json":
return json.load(open(existing_paths[0]))
if response.raise_for_status() is None:
return response


def get_local_data(data_path):
"""
Given a valid path to a file, returns the contents of the file.
"""
with open(data_path, "rb") as data_file:
return data_file.read()


def get_data(data_paths):
"""
Returns a dict of the data at a URL or local file path.
If data cannot be loaded, raises a RuntimeError.
"""
for path in data_paths:
_, ext = os.path.splitext(path)

return yaml.load(open(existing_paths[0]), yaml.SafeLoader)
if path.startswith("http"):
return to_dict(get_remote_data(path), ext)

if os.path.exists(path):
return to_dict(get_local_data(path), ext)
raise RuntimeError("Could not load data")


def load_data(source=None):
"""Loads data from YAML or JSON format into a dictionary.
Args:
source (str, optional): A local path or URL to a JSON or YAML data file.
Returns:
dict: The data from the local path or URL in the form of a dictionary.
Raises:
RuntimeError: If all attempted data sources are invalid, a RuntimeError will be raised.
"""
if source is None:
# Load data from first existing of these
candidate_paths = [
os.path.join(os.path.dirname(os.path.dirname(__file__)), "data.yaml"),
"/usr/share/cdn-definitions/data.yaml",
]

# If env var is set, it takes highest precedence
if "CDN_DEFINITIONS_PATH" in os.environ:
candidate_paths.insert(0, os.environ["CDN_DEFINITIONS_PATH"])

return get_data(candidate_paths)
return get_data([source])


def load_schema():
"""
Loads the `schema.json` file provided with the cdn-definitions package into a dictonary.
Returns:
dict: The cdn-definitions schema in the form of a dictionary.
"""
with open(
os.path.join(os.path.dirname(os.path.dirname(__file__)), "schema.json")
) as schema:
return json.load(schema)


DATA = load_data()
Expand All @@ -31,6 +108,10 @@ class PathAlias(object):
used to make two directory trees on CDN serve identical content."""

def __init__(self, **kwargs):
warnings.warn(
"PathAlias is deprecated - please use load_data instead",
DeprecationWarning,
)
self.src = kwargs["src"]
"""Source path of mapping (e.g. "/content/rhel/dist/rhui")."""

Expand All @@ -52,6 +133,10 @@ def rhui_aliases():
list[:class:`~PathAlias`]
A list of aliases relating to RHUI paths.
"""
warnings.warn(
"rhui_aliases is deprecated - please use load_data instead",
DeprecationWarning,
)
return [PathAlias(**elem) for elem in DATA["rhui_alias"]]


Expand All @@ -60,4 +145,8 @@ def origin_aliases():
list[:class:`~PathAlias`]
A list of aliases relating to origin paths.
"""
warnings.warn(
"origin_aliases is deprecated - please use load_data instead",
DeprecationWarning,
)
return [PathAlias(**elem) for elem in DATA["origin_alias"]]
1 change: 1 addition & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest
jsonschema
requests-mock
78 changes: 77 additions & 1 deletion tests/test_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
from cdn_definitions._impl import load_data
import os
import json

import pytest
import requests
import requests_mock

from cdn_definitions import load_data, load_schema


def test_can_load_custom_json_data(monkeypatch, tmpdir):
Expand All @@ -23,3 +30,72 @@ def test_can_load_custom_yaml_data(monkeypatch, tmpdir):
data = load_data()

assert data == {"hello": "yaml"}


@pytest.mark.parametrize(
"file_name, file_contents, expected",
[
("myfile.json", '{"french": "toast"}', {"french": "toast"}),
("myfile.yaml", "french: fries", {"french": "fries"}),
],
)
def test_can_load_local_data_from_source(tmpdir, file_name, file_contents, expected):
json_file = tmpdir.join(file_name)
json_file.write(file_contents)

data = load_data(source=str(json_file))

assert data == expected


def test_can_load_yaml_url_from_env_var(monkeypatch):
with requests_mock.Mocker() as m:
m.get("https://test.com/data.yaml", text='{"water": "melon"}')
monkeypatch.setenv("CDN_DEFINITIONS_PATH", "https://test.com/data.yaml")
data = load_data()

assert data == {"water": "melon"}


def test_can_load_json_url_from_env_var(monkeypatch):
with requests_mock.Mocker() as m:
m.get("https://test.com/data.json", json='{"green": "bean"}')
monkeypatch.setenv("CDN_DEFINITIONS_PATH", "https://test.com/data.json")
data = load_data()

assert data == {"green": "bean"}


def test_can_load_yaml_url_from_source_arg(monkeypatch):
with requests_mock.Mocker() as m:
m.get("http://test.com/data.yaml", text='{"grape": "fruit"}')
data = load_data("http://test.com/data.yaml")

assert data == {"grape": "fruit"}


def test_can_load_json_url_from_source_arg(monkeypatch):
with requests_mock.Mocker() as m:
m.get("http://test.com/data.json", json='{"green": "pepper"}')
data = load_data("http://test.com/data.json")

assert data == {"green": "pepper"}


def test_invalid_data_source():
with pytest.raises(RuntimeError, match="Could not load data"):
data = load_data(source="test")


def test_load_schema():
with open(
os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"src",
"cdn_definitions",
"schema.json",
)
) as f:
local_schema = json.load(f)

assert load_schema() == local_schema

0 comments on commit 29e374d

Please sign in to comment.