Skip to content

Commit

Permalink
Merge branch 'master' into fix/k8s-metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
chauhang committed Aug 24, 2023
2 parents 72fc96f + 921f423 commit e333731
Show file tree
Hide file tree
Showing 11 changed files with 423 additions and 178 deletions.
2 changes: 2 additions & 0 deletions docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ torch-workflow-archiver
torchvision
pyyaml
captum
torchtext
pytest
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.pytorch.serve.archive.DownloadArchiveException;
import org.pytorch.serve.archive.model.InvalidModelException;
import org.pytorch.serve.archive.s3.HttpUtils;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.Constructor;
import org.yaml.snakeyaml.error.YAMLException;
Expand All @@ -47,7 +48,7 @@ public static <T> T readFile(File file, Class<T> type)

public static <T> T readYamlFile(File file, Class<T> type)
throws InvalidModelException, IOException {
Yaml yaml = new Yaml(new Constructor(type));
Yaml yaml = new Yaml(new Constructor(type, new LoaderOptions()));
try (Reader r =
new InputStreamReader(
Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)) {
Expand Down
2 changes: 1 addition & 1 deletion frontend/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ slf4j_api_version=1.7.32
slf4j_log4j_version=2.17.1
testng_version=7.1.0
torchserve_sdk_version=0.0.5
snakeyaml_version=1.31
snakeyaml_version=2.1
grpc_version=1.50.0
protoc_version=3.18.0
lmax_disruptor_version=3.4.4
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.composer.ComposerException;
import org.yaml.snakeyaml.constructor.Constructor;
Expand Down Expand Up @@ -64,7 +65,7 @@ public void validate() {

public static MetricConfiguration loadConfiguration(String configFilePath)
throws FileNotFoundException, ComposerException, RuntimeException {
Constructor constructor = new Constructor(MetricConfiguration.class);
Constructor constructor = new Constructor(MetricConfiguration.class, new LoaderOptions());
Yaml yaml = new Yaml(constructor);
FileInputStream inputStream = new FileInputStream(new File(configFilePath));
MetricConfiguration config = yaml.load(inputStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.commons.io.IOUtils;
import org.pytorch.serve.servingsdk.snapshot.SnapshotSerializer;
import org.pytorch.serve.snapshot.SnapshotSerializerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class ConfigManager {
Expand Down Expand Up @@ -111,6 +112,9 @@ public final class ConfigManager {
private static final String MODEL_CONFIG = "models";
private static final String VERSION = "version";

// Configuration default values
private static final String DEFAULT_TS_ALLOWED_URLS = "file://.*|http(s)?://.*";

// Variables which are local
public static final String MODEL_METRICS_LOGGER = "MODEL_METRICS";
public static final String MODEL_LOGGER = "MODEL_LOG";
Expand All @@ -136,6 +140,7 @@ public final class ConfigManager {
private String hostName;
private Map<String, Map<String, JsonObject>> modelConfig = new HashMap<>();
private String torchrunLogDir;
private Logger logger = LoggerFactory.getLogger(ConfigManager.class);

private ConfigManager(Arguments args) throws IOException {
prop = new Properties();
Expand Down Expand Up @@ -234,6 +239,13 @@ private ConfigManager(Arguments args) throws IOException {
}

setModelConfig();

// Issue warnining about URLs that can be accessed when loading models
if (prop.getProperty(TS_ALLOWED_URLS, DEFAULT_TS_ALLOWED_URLS) == DEFAULT_TS_ALLOWED_URLS) {
logger.warn(
"Your torchserve instance can access any URL to load models. "
+ "When deploying to production, make sure to limit the set of allowed_urls in config.properties");
}
}

public static String readFile(String path) throws IOException {
Expand Down Expand Up @@ -783,7 +795,7 @@ private static int getAvailableGpu() {
}

public List<String> getAllowedUrls() {
String allowedURL = prop.getProperty(TS_ALLOWED_URLS, "file://.*|http(s)?://.*");
String allowedURL = prop.getProperty(TS_ALLOWED_URLS, DEFAULT_TS_ALLOWED_URLS);
return Arrays.asList(allowedURL.split(","));
}

Expand Down
19 changes: 12 additions & 7 deletions ts/metrics/caching_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ts.metrics.dimension import Dimension
from ts.metrics.metric_abstract import MetricAbstract
from ts.metrics.metric_type_enum import MetricTypes

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -36,7 +37,7 @@ def __init__(
unit can be one of ms, percent, count, MB, GB or a generic string
dimension_names list
list of dimension names which should be strings
list of dimension name strings
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
Expand All @@ -57,9 +58,11 @@ def _validate_and_get_dimensions(
values corresponding to the metrics dimension names
Returns
-------
list of dimension objects or ValueError
list of Dimension objects or ValueError
"""
if dimension_values is None or len(dimension_values) != len(self.dimension_names):
if dimension_values is None or len(dimension_values) != len(
self.dimension_names
):
raise ValueError(
f"Dimension values: {dimension_values} should "
f"correspond to Dimension names: {self.dimension_names}"
Expand Down Expand Up @@ -97,8 +100,10 @@ def emit_metrics(
value
dimension_string
"""
metric_str = f"[METRICS]{self.metric_name}.{self.unit}:{value}|#{dimension_string}|" \
f"#hostname:{socket.gethostname()},{int(time.time())}"
metric_str = (
f"[METRICS]{self.metric_name}.{self.unit}:{value}|#{dimension_string}|"
f"#hostname:{socket.gethostname()},{int(time.time())}"
)
if request_id:
logger.info(f"{metric_str},{request_id}")
else:
Expand All @@ -118,7 +123,7 @@ def add_or_update(
value : int, float
metric to be updated
dimension_values : list
list of dimension values
list of dimension value strings
request_id : str
request id to be associated with the metric
"""
Expand Down Expand Up @@ -152,7 +157,7 @@ def update(
request_id : str
request id to be associated with the metric
dimensions : list
list of dimension values
list of Dimension objects
"""
logger.warning("Overriding existing dimensions")
self.dimension_names = [dim.name for dim in dimensions]
Expand Down
12 changes: 8 additions & 4 deletions ts/metrics/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import time
from builtins import str
from collections import OrderedDict

from ts.metrics.caching_metric import CachingMetric
from ts.metrics.metric_type_enum import MetricTypes

from ts.metrics.unit import Units

MetricUnit = Units()
Expand Down Expand Up @@ -41,7 +41,7 @@ def __init__(
unit: str
unit can be one of ms, percent, count, MB, GB or a generic string
dimensions: list
list of dimension objects
list of Dimension objects
request_id: str
req_id of metric
metric_method: str
Expand Down Expand Up @@ -73,13 +73,17 @@ def update(self, value):
value : int, float
metric to be updated
"""
self._caching_metric.add_or_update(value, self.dimension_values, request_id=self.request_id)
self._caching_metric.add_or_update(
value, self.dimension_values, request_id=self.request_id
)

def reset(self):
"""
Reset Metric value to 0
"""
self._caching_metric.add_or_update(0, self.dimension_values, request_id=self.request_id)
self._caching_metric.add_or_update(
0, self.dimension_values, request_id=self.request_id
)

def __str__(self):
dims = ",".join([str(d) for d in self.dimensions])
Expand Down
5 changes: 3 additions & 2 deletions ts/metrics/metric_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
Interface for metric class for TS
"""
import abc
from ts.metrics.unit import Units

from ts.metrics.metric_type_enum import MetricTypes
from ts.metrics.unit import Units

MetricUnit = Units()

Expand Down Expand Up @@ -33,7 +34,7 @@ def __init__(
unit can be one of ms, percent, count, MB, GB or a generic string
dimension_names list
list of dimension names which should be strings
list of dimension name strings
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
Expand Down
65 changes: 51 additions & 14 deletions ts/metrics/metric_cache_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"""
import abc
import os
import ts.metrics.metric_cache_errors as merrors

import ts.metrics.metric_cache_errors as merrors
from ts.metrics.dimension import Dimension
from ts.metrics.metric_abstract import MetricAbstract
from ts.metrics.metric_type_enum import MetricTypes
Expand All @@ -27,15 +27,17 @@ def __init__(self, config_file_path):
Name of file to be parsed
"""
self.cache = dict()
self.cache = {}
self.store = []
self.request_ids = None
self.model_name = None
self.config_file_path = config_file_path
try:
os.path.exists(self.config_file_path)
except Exception as exc:
raise merrors.MetricsCacheTypeError(f"Error loading {config_file_path} file: {exc}")
raise merrors.MetricsCacheTypeError(
f"Error loading {config_file_path} file: {exc}"
)

def _add_default_dims(self, idx, dimensions):
dim_names = [dim.name for dim in dimensions]
Expand Down Expand Up @@ -63,11 +65,44 @@ def _get_req(self, idx):
# check if request id for the metric is given, if so use it else have a list of all.
req_id = self.request_ids
if isinstance(req_id, dict):
req_id = ','.join(self.request_ids.values())
req_id = ",".join(self.request_ids.values())
if idx is not None and self.request_ids is not None and idx in self.request_ids:
req_id = self.request_ids[idx]
return req_id

def add_metric(
self,
name: str,
value: int or float,
unit: str,
idx: str = None,
dimensions: list = [],
metric_type: MetricTypes = MetricTypes.COUNTER,
):
"""
Add a generic metric
Default metric type is counter
Parameters
----------
name : str
metric name
value: int or float
value of the metric
unit: str
unit of metric
idx: str
request id to be associated with the metric
dimensions: list
list of Dimension objects for the metric
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
"""
req_id = self._get_req(idx)
dimensions = self._add_default_dims(req_id, dimensions)
metric = self._get_or_add_metric(name, unit, dimensions, metric_type)
metric.add_or_update(value, [dim.value for dim in dimensions], req_id)

def add_counter(
self,
name: str,
Expand All @@ -87,7 +122,7 @@ def add_counter(
idx: str
request id to be associated with the metric
dimensions: list
list of dimension names for the metric
list of Dimension objects for the metric
"""
req_id = self._get_req(idx)
dimensions = self._add_default_dims(req_id, dimensions)
Expand Down Expand Up @@ -118,7 +153,7 @@ def add_time(
unit: str
unit of metric, default here is ms, s is also accepted
dimensions: list
list of dimension names for the metric
list of Dimension objects for the metric
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
"""
Expand Down Expand Up @@ -155,7 +190,7 @@ def add_size(
unit: str
unit of metric, default here is 'MB', 'kB', 'GB' also supported
dimensions: list
list of dimensions for the metric
list of Dimension objects for the metric
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
"""
Expand Down Expand Up @@ -189,7 +224,7 @@ def add_percent(
idx: str
request id to be associated with the metric
dimensions: list
list of dimensions for the metric
list of Dimension objects for the metric
metric_type MetricTypes
Type of metric Counter, Gauge, Histogram
"""
Expand All @@ -215,17 +250,19 @@ def add_error(
value: int or float
value of the metric
dimensions: list
list of dimension objects for the metric
list of Dimension objects for the metric
"""
dimensions = self._add_default_dims(None, dimensions)
metric = self._get_or_add_metric(name, "", dimensions, MetricTypes.COUNTER)
metric.add_or_update(value, [dim.value for dim in dimensions])

def _get_or_add_metric(self, metric_name, unit, dimensions, metric_type) -> MetricAbstract:
def _get_or_add_metric(
self, metric_name, unit, dimensions, metric_type
) -> MetricAbstract:
try:
metric = self.get_metric(metric_name, metric_type)
except merrors.MetricsCacheKeyError:
metric = self.add_metric(
metric = self.add_metric_to_cache(
metric_name=metric_name,
unit=unit,
metric_type=metric_type,
Expand Down Expand Up @@ -269,11 +306,11 @@ def get_metric(
pass

@abc.abstractmethod
def add_metric(
def add_metric_to_cache(
self,
metric_name: str,
unit: str,
dimension_names: list = None,
dimension_names: list = [],
metric_type: MetricTypes = MetricTypes.COUNTER,
) -> MetricAbstract:
"""
Expand All @@ -287,7 +324,7 @@ def add_metric(
unit: str
unit of metric
dimension_names: list
list of dimensions for the metric
list of dimension name strings for the metric
metric_type: MetricTypes
Type of metric
Expand Down
Loading

0 comments on commit e333731

Please sign in to comment.