Skip to content

Commit

Permalink
Merge pull request #405 from open-dynaMIX/refactor_caching_data_sources
Browse files Browse the repository at this point in the history
Refactor of caching for DataSources
  • Loading branch information
winged committed May 2, 2019
2 parents a9236f2 + 429c8bb commit 7321837
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 143 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,14 @@ For this you can use the data_source extension point.
An example data_source looks like this:

```python
from caluma.data_sources import BaseDataSource
from caluma.data_source.data_sources import BaseDataSource
from caluma.data_source.utils import data_source_cache
import requests

class CustomDataSource(BaseDataSource):
info = 'User choices from "someapi"'

@data_source_cache(timeout=3600)
def get_data(self, info):
response = requests.get(
f"https://someapi/?user={info.context.user.username}"
Expand All @@ -298,8 +300,8 @@ This class needs also to be added to the `DATA_SOURCE_CLASSES` environment varia
### Properties

* `info`: Descriptive text for the data source (can also be a multilingual dict)
* `timeout`: Time you want to cache the data
* `default`: The default value to be returned if execution of `get_data()` fails
* `default`: The default value to be returned if execution of `get_data()` fails. If
this is `None`, the Exception won't be handled. Defaults to None.

### `get_data`-method
Must return an iterable. This iterable can contain strings, ints, floats
Expand All @@ -309,6 +311,13 @@ this value will also be used as label.

For the label, it's possible to use a dict with translated values.

### `data_source_cache` decorator
This decorator allows for caching the data based on the DataSource name.

Django's cache framework is used, so you can also implement your own caching logic. When
doing so, it is advisable to use the `data_source_` prefix for the key in order to avoid
conflicts.

#### Some valid examples

```python
Expand Down
11 changes: 2 additions & 9 deletions caluma/data_source/data_source_handlers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from collections import namedtuple
from datetime import timedelta

from django.conf import settings
from django.utils import translation
from django.utils.module_loading import import_string

from caluma.data_source.utils import cache_handler

DataSource = namedtuple("DataSource", ["name", "info"])


Expand Down Expand Up @@ -77,12 +74,8 @@ def get_data_source_data(info, name):
data_sources = get_data_sources(dic=True)
if name not in data_sources:
raise DataSourceException(f"No data_source found for name: {name}")
cls = data_sources[name]
ds = cls()
key = f"data_source_{name}_{info.context.user.username}"
raw_data = cache_handler(
ds.get_data, timedelta(seconds=ds.timeout), ds.default, key, info=info
)

raw_data = data_sources[name]().try_get_data_with_fallback(info)
if not is_iterable_and_no_string(raw_data):
raise DataSourceException(f"Failed to parse data from source: {name}")

Expand Down
30 changes: 24 additions & 6 deletions caluma/data_source/data_sources.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import logging

logger = logging.getLogger(__name__)


class BaseDataSource:
"""Basic data source class to be extended by any data source implementation.
Expand All @@ -14,19 +19,20 @@ class BaseDataSource:
Properties:
info: Informational string about this data source
timeout: timeout seconds for the cache (defaults to 3600)
default: default value to return if execution of `get_data()` fails
(defaults to [])
default: default value to return if execution of `get_data()` fails.
If this is `None`, the Exception won't be handled. Defaults to None.
A custom data source class could look like this:
```
>>> from caluma.data_sources import BaseDataSource
>>> from caluma.data_source.data_sources import BaseDataSource
... from caluma.data_source.utils import data_source_cache
... import requests
...
...
... class CustomDataSource(BaseDataSource):
... info = 'User choices from "someapi"'
...
... @data_source_cache(timeout=3600)
... def get_data(self, info):
... response = requests.get(
... f"https://someapi/?user={info.context.request.user.username}"
Expand All @@ -37,11 +43,23 @@ class BaseDataSource:
"""

info = None
timeout = 3600
default = []
default = None

def __init__(self):
pass

def get_data(self, info): # pragma: no cover
raise NotImplementedError()

def try_get_data_with_fallback(self, info):
try:
new_data = self.get_data(info)
except Exception as e:
logger.exception(
f"Executing {type(self).__name__}.get_data() failed:"
f"{e}\n Using default data."
)
if self.default is None:
raise e
return self.default
return new_data
45 changes: 39 additions & 6 deletions caluma/data_source/tests/data_sources.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from uuid import uuid4

from caluma.data_source.data_sources import BaseDataSource
from caluma.data_source.utils import data_source_cache


class MyDataSource(BaseDataSource):
info = {"en": "Nice test data source", "de": "Schöne Datenquelle"}
timeout = 3600
default = []
default = [1, 2, 3]

@data_source_cache(timeout=3600)
def get_data(self, info):
return [
1,
Expand All @@ -19,20 +22,50 @@ def get_data(self, info):
],
]

@data_source_cache(timeout=60)
def get_data_test_string(self, info):
return "test string"

@data_source_cache(timeout=60)
def get_data_uuid(self, info):
return str(uuid4())

@data_source_cache(timeout=1)
def get_data_expire(self, info):
return str(uuid4())


class MyFaultyDataSource(BaseDataSource):
info = "Faulty test data source"
timeout = 3600
default = []
default = None

@data_source_cache(timeout=3600)
def get_data(self, info):
return "just a string"


class MyOtherFaultyDataSource(BaseDataSource):
info = "Other faulty test data source"
timeout = 3600
default = []
default = None

@data_source_cache(timeout=3600)
def get_data(self, info):
return [["just", "some", "strings"]]


class MyBrokenDataSource(BaseDataSource):
info = "Other faulty test data source"
default = [1, 2, 3]

@data_source_cache(timeout=3600)
def get_data(self, info):
raise Exception()


class MyOtherBrokenDataSource(BaseDataSource):
info = "Other faulty test data source"
default = None

@data_source_cache(timeout=3600)
def get_data(self, info):
raise Exception()
14 changes: 14 additions & 0 deletions caluma/data_source/tests/snapshots/snap_test_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,17 @@
},
}
}

snapshots["test_data_source_defaults 1"] = {
"dataSource": {
"edges": [
{"node": {"label": "1", "slug": "1"}},
{"node": {"label": "2", "slug": "2"}},
{"node": {"label": "3", "slug": "3"}},
],
"pageInfo": {
"endCursor": "YXJyYXljb25uZWN0aW9uOjI=",
"startCursor": "YXJyYXljb25uZWN0aW9uOjA=",
},
}
}
34 changes: 34 additions & 0 deletions caluma/data_source/tests/test_cache_decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from time import sleep

from django.core.cache import cache

from .data_sources import MyDataSource


def test_set_cache(info):
cache.clear()
ds = MyDataSource()
result = ds.get_data_test_string(info)
assert result == "test string"
assert cache.get("data_source_MyDataSource") == "test string"


def test_get_from_cache(info):
cache.clear()
ds = MyDataSource()
ds.get_data_uuid(info)
cached_result = cache.get("data_source_MyDataSource")
new_result = ds.get_data_uuid(info)
assert cached_result == new_result


def test_expired_cache(info):
cache.clear()
ds = MyDataSource()
ds.get_data_expire(info)
cached_result = cache.get("data_source_MyDataSource")

sleep(1.1)
new_result = ds.get_data_uuid(info)

assert not cached_result == new_result
58 changes: 0 additions & 58 deletions caluma/data_source/tests/test_cache_handler.py

This file was deleted.

55 changes: 54 additions & 1 deletion caluma/data_source/tests/test_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_fetch_data_from_data_source(snapshot, schema_executor, data_source_sett
result = schema_executor(query)
assert not result.errors
snapshot.assert_match(result.data)
assert cache.get("data_source_MyDataSource_None")["data"] == [
assert cache.get("data_source_MyDataSource") == [
1,
5.5,
"sdkj",
Expand Down Expand Up @@ -116,6 +116,59 @@ def test_data_source_failure(data_source, schema_executor, settings):
assert result.errors


def test_data_source_defaults(snapshot, schema_executor, settings):
settings.DATA_SOURCE_CLASSES = [
f"caluma.data_source.tests.data_sources.MyBrokenDataSource"
]

query = """
query dataSource {
dataSource (name: "MyBrokenDataSource") {
pageInfo {
startCursor
endCursor
}
edges {
node {
label
slug
}
}
}
}
"""

result = schema_executor(query, variables={})
assert not result.errors
snapshot.assert_match(result.data)


def test_data_source_exception(schema_executor, settings):
settings.DATA_SOURCE_CLASSES = [
f"caluma.data_source.tests.data_sources.MyOtherBrokenDataSource"
]

query = """
query dataSource {
dataSource (name: "MyOtherBrokenDataSource") {
pageInfo {
startCursor
endCursor
}
edges {
node {
label
slug
}
}
}
}
"""

result = schema_executor(query, variables={})
assert result.errors


@pytest.mark.parametrize("configure", [True, False])
def test_fetch_data_from_non_existing_data_source(schema_executor, settings, configure):
if configure:
Expand Down
Loading

0 comments on commit 7321837

Please sign in to comment.