From 75cafc47ce86a33a90f2fc07e7a22776f0c4779d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 17 Apr 2021 14:11:12 +0200 Subject: [PATCH 1/4] Adds function to easily store and retrieve dataframe through a REST API --- _doc/sphinxdoc/source/api/index.rst | 1 + _doc/sphinxdoc/source/api/server.rst | 29 +++ .../ut_serverdoc/test_file_store_rest.py | 57 +++++- src/pyquickhelper/loghelper/github_api.py | 4 +- src/pyquickhelper/server/filestore_fastapi.py | 169 +++++++++++++++++- src/pyquickhelper/server/filestore_sqlite.py | 20 ++- 6 files changed, 269 insertions(+), 11 deletions(-) create mode 100644 _doc/sphinxdoc/source/api/server.rst diff --git a/_doc/sphinxdoc/source/api/index.rst b/_doc/sphinxdoc/source/api/index.rst index 9ae05e805..2f29dd59b 100644 --- a/_doc/sphinxdoc/source/api/index.rst +++ b/_doc/sphinxdoc/source/api/index.rst @@ -14,3 +14,4 @@ API Summary loghelper pycode texthelper + server diff --git a/_doc/sphinxdoc/source/api/server.rst b/_doc/sphinxdoc/source/api/server.rst new file mode 100644 index 000000000..b0f516441 --- /dev/null +++ b/_doc/sphinxdoc/source/api/server.rst @@ -0,0 +1,29 @@ + +REST API, local file store +========================== + +Benchmarking usually happens on a different job when running +CI jobs and cannot be included in the documentation unless +they are stored somewhere. A REST API is better than +a local file because it can be distance and do not rely +on local path. These functions are a simple implementation +of an API to store and retrieve dataframes with :epkg:`FastAPI`. + +.. contents:: + :local: + +REST API +++++++++ + +.. autosignature:: pyquickhelper.server.filestore_fastapi.fast_api_submit + +.. autosignature:: pyquickhelper.server.filestore_fastapi.fast_api_query + +.. autosignature:: pyquickhelper.server.filestore_fastapi.fast_api_content + +.. autosignature:: pyquickhelper.server.filestore_fastapi.create_app + +File Storage +++++++++++++ + +.. autosignature:: pyquickhelper.server.filestore_sqlapi.SqlLite3FileStore diff --git a/_unittests/ut_serverdoc/test_file_store_rest.py b/_unittests/ut_serverdoc/test_file_store_rest.py index 48862e24e..12e2ec9b6 100644 --- a/_unittests/ut_serverdoc/test_file_store_rest.py +++ b/_unittests/ut_serverdoc/test_file_store_rest.py @@ -6,7 +6,9 @@ import os import pandas from pyquickhelper.pycode import ExtTestCase, get_temp_folder -from pyquickhelper.server.filestore_fastapi import create_fast_api_app +from pyquickhelper.server.filestore_fastapi import ( + create_fast_api_app, fast_api_submit, fast_api_query, + fast_api_content) from fastapi.testclient import TestClient # pylint: disable=E0401 from pyquickhelper.server.filestore_sqlite import SqlLite3FileStore @@ -52,6 +54,12 @@ def test_file_store(self): js = response.json() self.assertEqual(len(js), 1) + response = client.post( + "/content/", json=dict(password="BBB")) + self.assertEqual(response.status_code, 200) + js = response.json() + self.assertEqual(len(js), 1) + response = client.post( "/metrics/", json=dict(name="essai", password="CCC")) self.assertEqual(response.status_code, 401) @@ -86,6 +94,53 @@ def test_file_store(self): self.assertEqual(len(js), 1) self.assertEqual(js[0]['value'], 0.67) + def test_file_store_df(self): + temp = get_temp_folder(__file__, "temp_file_storage_rest_df") + name = os.path.join(temp, "filestore.db3") + app = create_fast_api_app(name, "BBB") + client = TestClient(app) + response = client.get("/") + self.assertEqual(response.status_code, 200) + + df = pandas.DataFrame( + dict(A=[0, 5, 6], B=[4.5, 1, 1], C=["E", "R", "T"])) + resp = fast_api_submit(df, client, team="AA", name="BB", project="CCC", + version=1, password="BBB") + self.assertEqual(resp.status_code, 200) + + df = pandas.DataFrame( + dict(A=[0, 5, 6], B=[4.5, 2, 2], C=["E", "R", "Z"])) + resp = fast_api_submit(df, client, team="AA", name="BB", project="CCC", + version=2, password="BBB") + self.assertEqual(resp.status_code, 200) + + res = fast_api_query(client, team="AA", name="BB", project="CCC", + password="BBB") + exp = [{'id': 1, 'name': 'BB', 'format': '', + 'metadata': {'client': ['testclient', 50000]}, 'team': 'AA', + 'project': 'CCC', 'version': 1, 'format': 'df'}, + {'id': 2, 'name': 'BB', 'format': '', + 'metadata': {'client': ['testclient', 50000]}, 'team': 'AA', + 'project': 'CCC', 'version': 2, 'format': 'df'}] + for r in res: + del r['date'] + self.assertEqual(res, exp) + + df = fast_api_query(client, team="AA", name="BB", project="CCC", + password="BBB", as_df=True) + cols = ['team', 'project', 'name', 'version'] + df = df[cols] + mv = df.groupby(cols[:-1]).max() + self.assertEqual(mv.shape, (1, 1)) + self.assertEqual(mv.iloc[0, 0], 2) + + content = fast_api_content( + client, team="AA", name="BB", project="CCC", + password="BBB", as_df=True) + for c in content: + self.assertIsInstance(c['content'], pandas.DataFrame) + self.assertEqual(c['content'].shape, (3, 3)) + if __name__ == "__main__": unittest.main() diff --git a/src/pyquickhelper/loghelper/github_api.py b/src/pyquickhelper/loghelper/github_api.py index e053775af..82198d2cd 100644 --- a/src/pyquickhelper/loghelper/github_api.py +++ b/src/pyquickhelper/loghelper/github_api.py @@ -18,8 +18,8 @@ def __init__(self, response, url, **kwargs): if msg: msg = "\n" + "\n".join(msg) Exception.__init__(self, - "response={0}\nurl={1}\ntext={2}\nstatus={3}{4}".format( - response, url, response.text, response.status_code, msg)) + "response={0}\nurl={1}\ntext={2}\nstatus={3}{4}".format( + response, url, response.text, response.status_code, msg)) def call_github_api(owner, repo, ask, auth=None, headers=None): diff --git a/src/pyquickhelper/server/filestore_fastapi.py b/src/pyquickhelper/server/filestore_fastapi.py index 1291fc07c..65ed7396a 100644 --- a/src/pyquickhelper/server/filestore_fastapi.py +++ b/src/pyquickhelper/server/filestore_fastapi.py @@ -4,6 +4,7 @@ @brief Simple class to store and retrieve files through an API. """ import os +import io from typing import Optional from fastapi import FastAPI, Request, HTTPException from pydantic import BaseModel # pylint: disable=E0611 @@ -15,7 +16,7 @@ class Item(BaseModel): format: Optional[str] # pylint: disable=E1136 team: Optional[str] # pylint: disable=E1136 project: Optional[str] # pylint: disable=E1136 - version: Optional[str] # pylint: disable=E1136 + version: Optional[int] # pylint: disable=E1136 content: Optional[str] # pylint: disable=E1136 password: str @@ -30,7 +31,16 @@ class Query(BaseModel): name: Optional[str] # pylint: disable=E1136 team: Optional[str] # pylint: disable=E1136 project: Optional[str] # pylint: disable=E1136 - version: Optional[str] # pylint: disable=E1136 + version: Optional[int] # pylint: disable=E1136 + password: str + + +class QueryL(BaseModel): + name: Optional[str] # pylint: disable=E1136 + team: Optional[str] # pylint: disable=E1136 + project: Optional[str] # pylint: disable=E1136 + version: Optional[int] # pylint: disable=E1136 + limit: Optional[int] # pylint: disable=E1136 password: str @@ -71,9 +81,32 @@ async def query(query: Query, request: Request): project=query.project, version=query.version)) return res + async def content(query: QueryL, request: Request): + if query.password != password: + raise HTTPException(status_code=401, detail="Wrong password") + if query.limit is None: + limit = 5 + else: + limit = query.limit + res = [] + for r in store.enumerate_content( + name=query.name, team=query.team, project=query.project, + version=query.version): + if len(res) >= limit: + break + if "content" in r: + content = r['content'] + if hasattr(content, 'to_csv'): + st = io.StringIO() + content.to_csv(st, index=False, encoding="utf-8") + r['content'] = st.getvalue() + res.append(r) + return res + app = FastAPI() app.get("/")(get_root) app.post("/submit/")(submit) + app.post("/content/")(content) app.post("/metrics/")(metrics) app.post("/query/")(query) return app @@ -124,3 +157,135 @@ def create_app(): app = create_fast_api_app(os.environ['PYQUICKHELPER_FASTAPI_PATH'], os.environ['PYQUICKHELPER_FASTAPI_PWD']) return app + + +def fast_api_submit(df, client=None, url=None, name=None, team=None, + project=None, version=None, password=None): + """ + Stores a dataframe into a local stores. + + :param df: dataframe + :param client: for unittest purpose + :param url: API url (can be None if client is not) + :param name: name + :param team: team + :param project: project + :param version: version + :param password: password for the submission + :return: response + """ + st = io.StringIO() + df.to_csv(st, index=False, encoding="utf-8") + if password is None: + password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) + if password is None: + raise RuntimeError( + "password must be specified or environement variable " + "'PYQUICKHELPER_FASTAPI_PWD'.") + data = dict(team=team, project=project, version=version, + password=password, content=st.getvalue(), + name=name, format="df") + if client is None: + import requests + resp = requests.post("%s/submit/" % url, data=data) + else: + resp = client.post("/submit/", json=data) + + if resp.status_code != 200: + del data['content'] + del data['password'] + raise RuntimeError( + "Submission failed due to %r\ndata=%r." % (resp, data)) + return resp + + +def fast_api_query(client=None, url=None, name=None, team=None, + project=None, version=None, password=None, + as_df=False): + """ + Retrieves the list of dataframe based on partial information. + + :param client: for unittest purpose + :param url: API url (can be None if client is not) + :param name: name + :param team: team + :param project: project + :param version: version + :param password: password for the submission + :return: response + """ + if password is None: + password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) + if password is None: + raise RuntimeError( + "password must be specified or environement variable " + "'PYQUICKHELPER_FASTAPI_PWD'.") + data = dict(team=team, project=project, version=version, + password=password, name=name) + if client is None: + import requests + resp = requests.post("%s/query/" % url, data=data) + else: + resp = client.post("/query/", json=data) + + if resp.status_code != 200: + del data['content'] + del data['password'] + raise RuntimeError( + "Submission failed due to %r\ndata=%r." % (resp, data)) + if as_df: + import pandas + return pandas.DataFrame(resp.json()) + return resp.json() + + +def fast_api_content(client=None, url=None, name=None, team=None, + project=None, version=None, limit=5, + password=None, as_df=True): + """ + Retrieves the dataframes based on partial information. + Enumerates a list of dataframes. + + :param client: for unittest purpose + :param url: API url (can be None if client is not) + :param name: name + :param team: team + :param project: project + :param version: version + :param limit: maximum number of dataframes to retrieve + :param as_df: returns the content as a dataframe + :param password: password for the submission + :return: list of dictionary, content is a dataframe + """ + if password is None: + password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) + if password is None: + raise RuntimeError( + "password must be specified or environement variable " + "'PYQUICKHELPER_FASTAPI_PWD'.") + data = dict(team=team, project=project, version=version, + password=password, name=name, limit=limit) + if client is None: + import requests + resp = requests.post("%s/content/" % url, data=data) + else: + resp = client.post("/content/", json=data) + + if resp.status_code != 200: + del data['content'] + del data['password'] + raise RuntimeError( + "Submission failed due to %r\ndata=%r." % (resp, data)) + res = resp.json() + if as_df: + import pandas + + for r in res: + content = r.get('content', None) + if content is None: + continue + if 'format' in r and r['format'] == 'df': + st = io.StringIO(r['content']) + df = pandas.read_csv(st, encoding="utf-8") + r['content'] = df + return res diff --git a/src/pyquickhelper/server/filestore_sqlite.py b/src/pyquickhelper/server/filestore_sqlite.py index 3c48e53bc..270381637 100644 --- a/src/pyquickhelper/server/filestore_sqlite.py +++ b/src/pyquickhelper/server/filestore_sqlite.py @@ -17,6 +17,11 @@ class SqlLite3FileStore: :param path: location of the database. """ + @staticmethod + def v2s(value, s="'"): + if isinstance(value, str): + return "%s%s%s" % (s, value, s) + return str(value) def __init__(self, path="_file_store_.db3"): self.path_ = path @@ -47,7 +52,7 @@ def _create(self): '''CREATE TABLE files (id INTEGER PRIMARY KEY, date TEXT, name TEXT, format TEXT, metadata TEXT, team TEXT, - project TEXT, version TEXT, content BLOB)''') + project TEXT, version INT, content BLOB)''') commit = True if (('data',) in res and not self._check_same_column( @@ -106,10 +111,13 @@ def submit(self, name, content, format=None, date=None, metadata=None, if n is None: continue fields.append(k) - values.append(n.replace("\\", "\\\\").replace("'", "''")) + if isinstance(n, str): + values.append(n.replace("\\", "\\\\").replace("'", "''")) + else: + values.append(n) sqlite_insert_blob_query = """ INSERT INTO files (%s) VALUES (%s)""" % ( - ",".join(fields), ",".join("'%s'" % v for v in values)) + ",".join(fields), ",".join(map(SqlLite3FileStore.v2s, values))) cur = self.con_.cursor() cur.execute(sqlite_insert_blob_query) self.con_.commit() @@ -184,7 +192,7 @@ def enumerate_content(self, name=None, format=None, date=None, metadata=None, for k, v in record.items(): if v is None: continue - cond.append('%s="%s"' % (k, v)) + cond.append('%s=%s' % (k, SqlLite3FileStore.v2s(v, '"'))) fields = ["id", "name", "format", "date", "metadata", "team", "project", "version", "content"] for it in self._enumerate(cond, fields): @@ -211,7 +219,7 @@ def enumerate(self, name=None, format=None, date=None, metadata=None, for k, v in record.items(): if v is None: continue - cond.append('%s="%s"' % (k, v)) + cond.append('%s=%s' % (k, SqlLite3FileStore.v2s(v, '"'))) fields = ["id", "name", "format", "date", "metadata", "team", "project", "version"] for it in self._enumerate(cond, fields): @@ -245,7 +253,7 @@ def enumerate_data(self, idfile=None, name=None, join=False, else: cond.append('data.%s="%s"' % (k, v)) else: - cond.append('%s="%s"' % (k, v)) + cond.append('%s=%s' % (k, SqlLite3FileStore.v2s(v, '"'))) cur = self.con_.cursor() if join: From 3dfc711eaadb88f3118231799134deeb44c1da10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 17 Apr 2021 15:13:46 +0200 Subject: [PATCH 2/4] better design --- .../ut_serverdoc/test_file_store_rest.py | 17 +++- src/pyquickhelper/server/filestore_fastapi.py | 83 +++++++------------ 2 files changed, 46 insertions(+), 54 deletions(-) diff --git a/_unittests/ut_serverdoc/test_file_store_rest.py b/_unittests/ut_serverdoc/test_file_store_rest.py index 12e2ec9b6..7e162ccc2 100644 --- a/_unittests/ut_serverdoc/test_file_store_rest.py +++ b/_unittests/ut_serverdoc/test_file_store_rest.py @@ -1,5 +1,5 @@ """ -@brief test log(time=1s) +@brief test log(time=4s) """ import unittest @@ -8,13 +8,26 @@ from pyquickhelper.pycode import ExtTestCase, get_temp_folder from pyquickhelper.server.filestore_fastapi import ( create_fast_api_app, fast_api_submit, fast_api_query, - fast_api_content) + fast_api_content, _get_password, _post_request) from fastapi.testclient import TestClient # pylint: disable=E0401 from pyquickhelper.server.filestore_sqlite import SqlLite3FileStore class TestfileStoreRest(ExtTestCase): + def test_simple_function1(self): + self.assertRaise( + lambda: _get_password(None, "IMPOSSIBLE"), RuntimeError) + + def test_simple_function2(self): + from requests.exceptions import ConnectionError + self.assertRaise( + lambda: _post_request(None, None, None, None), AttributeError) + self.assertRaise( + lambda: _post_request(None, "http://localhost:7777", {}, "submit", + timeout=1.), + ConnectionError) + def test_file_store(self): temp = get_temp_folder(__file__, "temp_file_storage_rest") name = os.path.join(temp, "filestore.db3") diff --git a/src/pyquickhelper/server/filestore_fastapi.py b/src/pyquickhelper/server/filestore_fastapi.py index 65ed7396a..d34942e7a 100644 --- a/src/pyquickhelper/server/filestore_fastapi.py +++ b/src/pyquickhelper/server/filestore_fastapi.py @@ -159,6 +159,31 @@ def create_app(): return app +def _get_password(password, env="PYQUICKHELPER_FASTAPI_PWD"): + if password is None: + password = os.environ.get(env, None) + if password is None: + raise RuntimeError( + "password must be specified or environement variable " + "'PYQUICKHELPER_FASTAPI_PWD'.") + return password + + +def _post_request(client, url, data, suffix, timeout=None): + if client is None: + import requests + resp = requests.post("%s/%s" % (url.strip('/'), suffix), data=data, + timeout=timeout) + else: + resp = client.post("/%s/" % suffix, json=data) + if resp.status_code != 200: + del data['content'] + del data['password'] + raise RuntimeError( + "Post request failed due to %r\ndata=%r." % (resp, data)) + return resp + + def fast_api_submit(df, client=None, url=None, name=None, team=None, project=None, version=None, password=None): """ @@ -174,29 +199,13 @@ def fast_api_submit(df, client=None, url=None, name=None, team=None, :param password: password for the submission :return: response """ + password = _get_password(password) st = io.StringIO() df.to_csv(st, index=False, encoding="utf-8") - if password is None: - password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) - if password is None: - raise RuntimeError( - "password must be specified or environement variable " - "'PYQUICKHELPER_FASTAPI_PWD'.") data = dict(team=team, project=project, version=version, password=password, content=st.getvalue(), name=name, format="df") - if client is None: - import requests - resp = requests.post("%s/submit/" % url, data=data) - else: - resp = client.post("/submit/", json=data) - - if resp.status_code != 200: - del data['content'] - del data['password'] - raise RuntimeError( - "Submission failed due to %r\ndata=%r." % (resp, data)) - return resp + return _post_request(client, url, data, "submit") def fast_api_query(client=None, url=None, name=None, team=None, @@ -214,25 +223,10 @@ def fast_api_query(client=None, url=None, name=None, team=None, :param password: password for the submission :return: response """ - if password is None: - password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) - if password is None: - raise RuntimeError( - "password must be specified or environement variable " - "'PYQUICKHELPER_FASTAPI_PWD'.") + password = _get_password(password) data = dict(team=team, project=project, version=version, password=password, name=name) - if client is None: - import requests - resp = requests.post("%s/query/" % url, data=data) - else: - resp = client.post("/query/", json=data) - - if resp.status_code != 200: - del data['content'] - del data['password'] - raise RuntimeError( - "Submission failed due to %r\ndata=%r." % (resp, data)) + resp = _post_request(client, url, data, "query") if as_df: import pandas return pandas.DataFrame(resp.json()) @@ -257,25 +251,10 @@ def fast_api_content(client=None, url=None, name=None, team=None, :param password: password for the submission :return: list of dictionary, content is a dataframe """ - if password is None: - password = os.environ.get("PYQUICKHELPER_FASTAPI_PWD", None) - if password is None: - raise RuntimeError( - "password must be specified or environement variable " - "'PYQUICKHELPER_FASTAPI_PWD'.") + password = _get_password(password) data = dict(team=team, project=project, version=version, password=password, name=name, limit=limit) - if client is None: - import requests - resp = requests.post("%s/content/" % url, data=data) - else: - resp = client.post("/content/", json=data) - - if resp.status_code != 200: - del data['content'] - del data['password'] - raise RuntimeError( - "Submission failed due to %r\ndata=%r." % (resp, data)) + resp = _post_request(client, url, data, "content") res = resp.json() if as_df: import pandas From 307f09bac29fa3ca02b73bfb633f216a177c47c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 17 Apr 2021 15:18:57 +0200 Subject: [PATCH 3/4] removes duplicated imports --- src/pyquickhelper/helpgen/utils_sphinx_doc_helpers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/pyquickhelper/helpgen/utils_sphinx_doc_helpers.py b/src/pyquickhelper/helpgen/utils_sphinx_doc_helpers.py index 8bac8d386..dfa46c6f7 100644 --- a/src/pyquickhelper/helpgen/utils_sphinx_doc_helpers.py +++ b/src/pyquickhelper/helpgen/utils_sphinx_doc_helpers.py @@ -1,9 +1,7 @@ """ @file -@brief various variables and classes used to produce a Sphinx documentation - +@brief Various variables and classes used to produce a Sphinx documentation. """ - import inspect import os import copy @@ -732,7 +730,6 @@ def import_module(rootm, filename, log_function, additional_sys_path=None, except SystemError as e: # pragma: no cover log_function("[warning] -- unable to import module (2) ", filename, ",", fi, " in path ", sdir, " Error: ", str(e)) - import traceback stack = traceback.format_exc() log_function(" executable", sys.executable) log_function(" version", sys.version_info) @@ -752,7 +749,6 @@ def import_module(rootm, filename, log_function, additional_sys_path=None, else: log_function("[warning] -- unable to import module (4) ", filename, ",", fi, " in path ", sdir, " Error: ", str(e)) - import traceback stack = traceback.format_exc() log_function(" executable", sys.executable) log_function(" version", sys.version_info) From 43067c0333e8b6f0fbf4163d00be66dda6de808a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 17 Apr 2021 15:22:14 +0200 Subject: [PATCH 4/4] lint --- _unittests/ut_serverdoc/test_file_store_rest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_unittests/ut_serverdoc/test_file_store_rest.py b/_unittests/ut_serverdoc/test_file_store_rest.py index 7e162ccc2..acd84d8c3 100644 --- a/_unittests/ut_serverdoc/test_file_store_rest.py +++ b/_unittests/ut_serverdoc/test_file_store_rest.py @@ -129,10 +129,10 @@ def test_file_store_df(self): res = fast_api_query(client, team="AA", name="BB", project="CCC", password="BBB") - exp = [{'id': 1, 'name': 'BB', 'format': '', + exp = [{'id': 1, 'name': 'BB', 'metadata': {'client': ['testclient', 50000]}, 'team': 'AA', 'project': 'CCC', 'version': 1, 'format': 'df'}, - {'id': 2, 'name': 'BB', 'format': '', + {'id': 2, 'name': 'BB', 'metadata': {'client': ['testclient', 50000]}, 'team': 'AA', 'project': 'CCC', 'version': 2, 'format': 'df'}] for r in res: