-
Notifications
You must be signed in to change notification settings - Fork 2.9k
/
util.py
252 lines (195 loc) · 7.34 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A bunch of useful utilities."""
from __future__ import annotations
import asyncio
import dataclasses
import functools
import hashlib
import os
import subprocess
import sys
from typing import (
Any,
Dict,
Generic,
Iterable,
List,
Mapping,
Optional,
Set,
TypeVar,
Union,
)
from cachetools import TTLCache
from typing_extensions import Final
from streamlit import env_util
# URL of Streamlit's help page.
HELP_DOC: Final = "https://docs.streamlit.io/"
FLOAT_EQUALITY_EPSILON: Final[float] = 0.000000000005
# Due to security issue in md5 and sha1, usedforsecurity
# argument is added to hashlib for python versions higher than 3.8
HASHLIB_KWARGS: Dict[str, Any] = (
{"usedforsecurity": False} if sys.version_info >= (3, 9) else {}
)
def memoize(func):
"""Decorator to memoize the result of a no-args func."""
result: List[Any] = []
@functools.wraps(func)
def wrapped_func():
if not result:
result.append(func())
return result[0]
return wrapped_func
def open_browser(url):
"""Open a web browser pointing to a given URL.
We use this function instead of Python's `webbrowser` module because this
way we can capture stdout/stderr to avoid polluting the terminal with the
browser's messages. For example, Chrome always prints things like "Created
new window in existing browser session", and those get on the user's way.
url : str
The URL. Must include the protocol.
"""
# Treat Windows separately because:
# 1. /dev/null doesn't exist.
# 2. subprocess.Popen(['start', url]) doesn't actually pop up the
# browser even though 'start url' works from the command prompt.
# Fun!
# Also, use webbrowser if we are on Linux and xdg-open is not installed.
#
# We don't use the webbrowser module on Linux and Mac because some browsers
# (ahem... Chrome) always print "Opening in existing browser session" to
# the terminal, which is spammy and annoying. So instead we start the
# browser ourselves and send all its output to /dev/null.
if env_util.IS_WINDOWS:
_open_browser_with_webbrowser(url)
return
if env_util.IS_LINUX_OR_BSD:
if env_util.is_executable_in_path("xdg-open"):
_open_browser_with_command("xdg-open", url)
return
_open_browser_with_webbrowser(url)
return
if env_util.IS_DARWIN:
_open_browser_with_command("open", url)
return
import platform
raise Error('Cannot open browser in platform "%s"' % platform.system())
def _open_browser_with_webbrowser(url):
import webbrowser
webbrowser.open(url)
def _open_browser_with_command(command, url):
cmd_line = [command, url]
with open(os.devnull, "w") as devnull:
subprocess.Popen(cmd_line, stdout=devnull, stderr=subprocess.STDOUT)
def repr_(self: Any) -> str:
"""A clean repr for a class, excluding both values that are likely defaults,
and those explicitly default for dataclasses.
"""
classname = self.__class__.__name__
# Most of the falsey value, but excluding 0 and 0.0, since those often have
# semantic meaning within streamlit.
defaults: list[Any] = [None, "", False, [], set(), dict()]
if dataclasses.is_dataclass(self):
fields_vals = (
(f.name, getattr(self, f.name))
for f in dataclasses.fields(self)
if f.repr
and getattr(self, f.name) != f.default
and getattr(self, f.name) not in defaults
)
else:
fields_vals = ((f, v) for (f, v) in self.__dict__.items() if v not in defaults)
field_reprs = ", ".join(f"{field}={value!r}" for field, value in fields_vals)
return f"{classname}({field_reprs})"
_Value = TypeVar("_Value")
def index_(iterable: Iterable[_Value], x: _Value) -> int:
"""Return zero-based index of the first item whose value is equal to x.
Raises a ValueError if there is no such item.
We need a custom implementation instead of the built-in list .index() to
be compatible with NumPy array and Pandas Series.
Parameters
----------
iterable : list, tuple, numpy.ndarray, pandas.Series
x : Any
Returns
-------
int
"""
for i, value in enumerate(iterable):
if x == value:
return i
elif isinstance(value, float) and isinstance(x, float):
if abs(x - value) < FLOAT_EQUALITY_EPSILON:
return i
raise ValueError("{} is not in iterable".format(str(x)))
_Key = TypeVar("_Key", bound=str)
def lower_clean_dict_keys(dict: Mapping[_Key, _Value]) -> Dict[str, _Value]:
return {k.lower().strip(): v for k, v in dict.items()}
# TODO: Move this into errors.py? Replace with StreamlitAPIException?
class Error(Exception):
pass
def calc_md5(s: Union[bytes, str]) -> str:
"""Return the md5 hash of the given string."""
h = hashlib.new("md5", **HASHLIB_KWARGS)
b = s.encode("utf-8") if isinstance(s, str) else s
h.update(b)
return h.hexdigest()
def exclude_keys_in_dict(
d: Dict[str, Any], keys_to_exclude: List[str]
) -> Dict[str, Any]:
"""Returns new object but without keys defined in keys_to_exclude"""
return {
key: value for key, value in d.items() if key.lower() not in keys_to_exclude
}
def extract_key_query_params(
query_params: Dict[str, List[str]], param_key: str
) -> Set[str]:
"""Extracts key (case-insensitive) query params from Dict, and returns them as Set of str."""
return set(
[
item.lower()
for sublist in [
[value.lower() for value in query_params[key]]
for key in query_params.keys()
if key.lower() == param_key and query_params.get(key)
]
for item in sublist
]
)
K = TypeVar("K")
V = TypeVar("V")
class TimedCleanupCache(TTLCache, Generic[K, V]):
"""A TTLCache that asynchronously expires its entries."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._task: Optional[asyncio.Task[Any]] = None
def __setitem__(self, key: K, value: V) -> None:
# Set an expiration task to run periodically
# Can't be created in init because that only runs once and
# the event loop might not exist yet.
if self._task is None:
try:
self._task = asyncio.create_task(expire_cache(self))
except RuntimeError:
# Just continue if the event loop isn't started yet.
pass
super().__setitem__(key, value)
def __del__(self):
if self._task is not None:
self._task.cancel()
async def expire_cache(cache: TTLCache) -> None:
while True:
await asyncio.sleep(30)
cache.expire()