-
Notifications
You must be signed in to change notification settings - Fork 17
/
grid_search.py
214 lines (169 loc) · 5.88 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
from typing import Dict, List, Optional, Union
from typing_extensions import Literal
from clipped.compact.pydantic import Field, PositiveInt, validator
from clipped.config.schema import skip_partial
from clipped.types.ref_or_obj import RefField
from polyaxon._flow.early_stopping import V1EarlyStopping
from polyaxon._flow.matrix.base import BaseSearchConfig
from polyaxon._flow.matrix.enums import V1MatrixKind
from polyaxon._flow.matrix.params import V1HpParam
def validate_matrix(matrix):
if not matrix:
return None
for key, value in matrix.items():
if value.is_distribution:
raise ValueError(
"`{}` defines a distribution, "
"and it cannot be used with grid search.".format(key)
)
return matrix
class V1GridSearch(BaseSearchConfig):
"""Grid search is essentially an exhaustive search through a manually
specified set of hyperparameters.
User can possibly limit the number of experiments and not traverse the whole
search space created by providing `numRuns`.
Grid search does not allow the use of distributions,
and requires that all values of the params definition to
be [discrete values](/docs/automation/optimization-engine/params/#discrete-values).
Args:
kind: str, should be equal `grid`
params: List[Dict[str, [params](/docs/automation/optimization-engine/params/#discrete-values)]] # noqa
concurrency: int, optional
num_runs: int, optional
early_stopping: List[[EarlyStopping](/docs/automation/helpers/early-stopping)], optional
## YAML usage
```yaml
>>> matrix:
>>> kind: grid
>>> concurrency:
>>> params:
>>> numRuns:
>>> earlyStopping:
```
## Python usage
```python
>>> from polyaxon.schemas import (
>>> V1GridSearch, V1HpLogSpace, V1HpChoice, V1FailureEarlyStopping, V1MetricEarlyStopping
>>> )
>>> matrix = V1GridSearch(
>>> concurrency=2,
>>> params={"param1": V1HpLogSpace(...), "param2": V1HpChoice(...), ... },
>>> num_runs=5
>>> early_stopping=[V1FailureEarlyStopping(...), V1MetricEarlyStopping(...)]
>>> )
```
## Fields
### kind
The kind signals to the CLI, client, and other tools that this matrix is a grid search.
If you are using the python client to create the mapping,
this field is not required and is set by default.
```yaml
>>> matrix:
>>> kind: grid
```
### concurrency
An optional value to set the number of concurrent operations.
<blockquote class="light">
This value only makes sense if less or equal to the total number of possible runs.
</blockquote>
```yaml
>>> matrix:
>>> kind: grid
>>> concurrency: 2
```
For more details about concurrency management,
please check the [concurrency section](/docs/automation/helpers/concurrency/).
### params
A dictionary of `key -> value generator`
to generate the parameters.
Gird search can only use
[discrete value](/docs/automation/optimization-engine/params/#discrete-values).
> The parameters generated will be validated against
> the component's inputs/outputs definition to check that the values
> can be passed and have valid types.
```yaml
>>> matrix:
>>> kind: grid
>>> params:
>>> param1:
>>> kind: ...
>>> value: ...
>>> param2:
>>> kind: ...
>>> value: ...
```
### numRuns
Maximum number of runs to start based on the search space defined.
```yaml
>>> matrix:
>>> kind: grid
>>> numRuns: 5
```
### earlyStopping
A list of early stopping conditions to check for terminating
all operations managed by the pipeline.
If one of the early stopping conditions is met,
a signal will be sent to terminate all running and pending operations.
```yaml
>>> matrix:
>>> kind: grid
>>> earlyStopping: ...
```
For more details please check the
[early stopping section](/docs/automation/helpers/early-stopping/).
## Example
This example will define 10 experiments based on the cartesian product
of `lr` and `dropout` possible values.
```yaml
>>> version: 1.1
>>> kind: operation
>>> matrix:
>>> kind: grid
>>> concurrency: 2
>>> params:
>>> lr:
>>> kind: logspace
>>> value: 0.01:0.1:5
>>> dropout:
>>> kind: choice
>>> value: [0.2, 0.5]
>>> early_stopping:
>>> - metric: accuracy
>>> value: 0.9
>>> optimization: maximize
>>> - metric: loss
>>> value: 0.05
>>> optimization: minimize
>>> component:
>>> inputs:
>>> - name: batch_size
>>> type: int
>>> isOptional: true
>>> value: 128
>>> - name: lr
>>> type: float
>>> - name: dropout
>>> type: float
>>> container:
>>> image: image:latest
>>> command: [python3, train.py]
>>> args: ["--batch-size={{ batch_size }}", "--lr={{ lr }}", "--dropout={{ dropout }}"]
```
"""
_IDENTIFIER = V1MatrixKind.GRID
kind: Literal[_IDENTIFIER] = _IDENTIFIER
params: Union[Dict[str, V1HpParam], RefField]
num_runs: Optional[Union[PositiveInt, RefField]] = Field(alias="numRuns")
concurrency: Optional[Union[PositiveInt, RefField]]
early_stopping: Optional[Union[List[V1EarlyStopping], RefField]] = Field(
alias="earlyStopping"
)
@validator("num_runs", "concurrency", pre=True)
def check_values(cls, v, field):
if v and v < 1:
raise ValueError(f"{field} must be greater than 1, received `{v}` instead.")
return v
@validator("params", always=True)
@skip_partial
def validate_matrix(cls, params):
return validate_matrix(params)