/
__init__.py
358 lines (270 loc) · 11.1 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
from typing import List, Optional, Union
from clipped.compact.pydantic import Field, StrictStr
from clipped.types.ref_or_obj import BoolOrRef, RefField
from polyaxon._auxiliaries.sidecar import V1PolyaxonSidecarContainer
from polyaxon._flow.notifications import V1Notification
from polyaxon._schemas.base import BaseSchemaModel
class V1Plugins(BaseSchemaModel):
"""Plugins section provides a way to customize extra Polyaxon utilities.
By default, Polyaxon injects some information for example an auth context
and triggers some mechanisms for collecting logs and outputs.
Plugins section provides more control to the end user to enable/disable some of these utilities.
Args:
auth: bool, optional, default: True
docker: bool, optional, default: False
shm: bool, optional, default: True
mount_artifacts_store: bool, optional, default: True
collect_artifacts: bool, optional, default: True
collect_logs: bool, optional, default: True
collect_resources: bool, optional, default: True
auto_resume: bool, optional, default: True
log_level: str, optional
sidecar: V1PolyaxonSidecarContainer, optional
## YAML usage
```yaml
>>> plugins:
>>> auth:
>>> docker:
>>> shm:
>>> mountArtifactsStore:
>>> collectArtifacts:
>>> collectLogs:
>>> collectResources:
>>> autoResume:
>>> externalHost:
>>> logLevel:
>>> sidecar:
```
## Python usage
```python
>>> from polyaxon.schemas import V1Plugins
>>> plugins = V1Plugins(
>>> auth=False,
>>> docker=True,
>>> shm=True.
>>> mount_artifacts_store=True,
>>> collect_artifacts=False,
>>> collect_logs=False,
>>> collect_resources=False
>>> auto_resume=False,
>>> external_host=False,
>>> log_level="INFO",
>>> )
```
## Fields
### auth
<blockquote class="light">
This plugin is enabled by default in Polyaxon deployments with user management.
</blockquote>
By default, Polyaxon will create an auth context for each operation, this removes the overhead
to think about how you can pass tokens to your runs,
or the need to create secrets to load the token from during the run time.
The auth context that Polyaxon provides is not only specific to the user who
executed the run, but it also impersonates similar user access rights, it has the same scopes
and restrictions the user usually has within the context of the project
managing the run.
This is important to make sure the API calls made during the run
by the user's code have the right access to the resources requested.
Oftentimes, users might not need to use an authenticated client inside their containers,
in that case they can disable this plugin.
To disable this plugin:
```yaml
>>> plugins:
>>> auth: false
```
### docker
<blockquote class="light">This plugin is disabled by default.</blockquote>
This plugin exposes a docker socket volume to your run container.
N.B. use this plugin carefully, you might also need to check with your devops
team before using it, it requires docker.sock of the host to be mounted
which is often rejected by OPA.
To enable this plugin:
```yaml
>>> plugins:
>>> docker: true
```
### shm
<blockquote class="light">This plugin is enabled by default.</blockquote>
This plugin mounts an tmpfs volume to /dev/shm.
This will set /dev/shm size to half of the RAM of node.
By default, /dev/shm is very small, only 64MB.
Some experiments/jobs will fail due to the lack of shared memory,
such as some experiments running on Pytorch.
To disable this plugin:
```yaml
>>> plugins:
>>> shm: false
```
### mountArtifactsStore
<blockquote class="light">This plugin is disabled by default.</blockquote>
this plugin allows to request the default artifacts store and mount it to the main container
without adding the connection reference name to the `connections` section.
This is usually very useful than setting the `connections` section as it make the component
more generic and will not break if the artifacts store name changes.
### collectArtifacts
<blockquote class="light">This plugin is enabled by default.</blockquote>
By default, Polyaxon will collect all artifacts and outputs that you share in the
`plx-context/artifacts/run-uuid/outputs` to the default artifacts store
that you configured during the deployment.
This plugin is important if you want to have an agnostic code to the
type of storage backend your are using, by changing the environment variable
you can test your code with `tmp` file locally and the artifacts path in production.
To disable this plugin:
```yaml
>>> plugins:
>>> collectArtifacts: false
```
Sometimes you might want to access the artifacts path in your polyaxonfile,
Polyaxon expose a [context](/docs/core//context/) that get resolved during
the compilation time, you can just use
"{{run_artifacts_path}}" global variable and it will be resolved automatically.
Example:
```yaml
>>> run:
>>> kind: job
>>> container:
>>> command: "cp /some/know/path/file {{run_artifacts_path}}/file"
```
For more information about the context, please check [context](/docs/core/context/)
### collectLogs
<blockquote class="light">This plugin is enabled by default.</blockquote>
By default, Polyaxon will collect all logs related to your runs before deleting
the resource on the clusters. This ensures that your cluster(s) are kept clean and no resources
are actively putting pressure on the API server.
Sometimes you might want to avoid collecting logs for some runs, for example test or debug jobs.
To disable this plugin:
```yaml
>>> plugins:
>>> collectLogs: false
```
### collectResources
<blockquote class="light">This plugin is enabled by default.</blockquote>
By default, Polyaxon will collect all Mem/CPU/GPU resources
for your runs that use the python client.
Sometimes you might want to avoid collecting this information for some runs,
for example test or debug jobs.
To disable this plugin:
```yaml
>>> plugins:
>>> collectResources: false
```
### autoResume
<blockquote class="light">This plugin is enabled by default.</blockquote>
By default, Polyaxon will resume from collecting metrics/outputs/artifacts
if a run fails and retries or if the user resume a run.
To disable this plugin:
```yaml
>>> plugins:
>>> autoResume: false
```
### externalHost
<blockquote class="light">Default is False.</blockquote>
In some edge cases where the auxiliaries and/or the main container cannot reach the API/Streams
services via the internal networking interface, you can enable this flag to tell Polyaxon
resolve the external host instead of the default behavior with the in-cluster host.
```yaml
>>> plugins:
>>> externalHost: true
```
### logLevel
<blockquote class="light">Default is None.</blockquote>
If you want to control the log level of your runs in a similar way locally and on the cluster,
you can either use env vars or this plugin to share the same log level with all containers
running in your operation.
```yaml
>>> plugins:
>>> logLevel: warning
```
### sidecar
<blockquote class="light">Default is None.</blockquote>
To override the default global sidecar configuration.
```yaml
>>> plugins:
>>> sidecar:
>>> syncInterval: 60
```
"""
_IDENTIFIER = "plugins"
auth: Optional[BoolOrRef]
docker: Optional[BoolOrRef]
shm: Optional[BoolOrRef]
mount_artifacts_store: Optional[BoolOrRef] = Field(alias="mountArtifactsStore")
collect_artifacts: Optional[BoolOrRef] = Field(alias="collectArtifacts")
collect_logs: Optional[BoolOrRef] = Field(alias="collectLogs")
collect_resources: Optional[BoolOrRef] = Field(alias="collectResources")
sync_statuses: Optional[BoolOrRef] = Field(alias="syncStatuses")
auto_resume: Optional[BoolOrRef] = Field(alias="autoResume")
log_level: Optional[StrictStr] = Field(alias="logLevel")
external_host: Optional[BoolOrRef] = Field(alias="externalHost")
sidecar: Optional[Union[V1PolyaxonSidecarContainer, RefField]]
notifications: Optional[Union[List[V1Notification], RefField]]
@classmethod
def get_or_create(
cls, config: Optional["V1Plugins"], auth: bool = False
) -> "V1Plugins":
if not config:
config = cls(auth=auth)
else:
config = config.copy()
config.set_auth(default=auth)
config.set_docker()
config.set_shm()
config.set_mount_artifacts_store()
config.set_collect_artifacts()
config.set_collect_logs()
config.set_collect_resources()
config.set_sync_statuses()
config.set_auto_resume()
config.set_external_host()
return config
@staticmethod
def no_api():
from polyaxon import settings
return settings.CLIENT_CONFIG.no_api
def set_auth(self, default: bool = False):
if self.no_api():
self.auth = False
elif self.auth is None:
self.auth = default
def set_docker(self, default: bool = False):
if self.docker is None:
self.docker = default
def set_shm(self, default: bool = True):
if self.shm is None:
self.shm = default
def set_mount_artifacts_store(self, default: bool = False):
if self.mount_artifacts_store is None:
self.mount_artifacts_store = default
def set_collect_artifacts(self, default: bool = True):
if self.no_api():
self.collect_artifacts = False
elif self.collect_artifacts is None:
self.collect_artifacts = default
def set_collect_logs(self, default: bool = True):
if self.no_api():
self.collect_logs = False
elif self.collect_logs is None:
self.collect_logs = default
def set_collect_spec(self, default: bool = True):
if self.no_api():
self.collect_spec = False
elif self.collect_spec is None:
self.collect_spec = default
def set_collect_resources(self, default: bool = True):
if self.no_api():
self.collect_resources = False
elif self.collect_resources is None:
self.collect_resources = default
def set_sync_statuses(self, default: bool = True):
if self.no_api():
self.sync_statuses = False
elif self.sync_statuses is None:
self.sync_statuses = default
def set_auto_resume(self, default: bool = True):
if self.no_api():
self.auto_resume = False
elif self.auto_resume is None:
self.auto_resume = default
def set_external_host(self, default: bool = False):
if self.external_host is None:
self.external_host = default