Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
0 contributors

Users who have contributed to this file

286 lines (267 sloc) 10.4 KB
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
syntax = "proto2";
package tensorflow.metadata.v0;
option cc_enable_arenas = true;
option java_package = "org.tensorflow.metadata.v0";
option java_multiple_files = true;
// TODO(b/123519907): Remove this.
// GOOGLE-LEGACY import "net/proto2/bridge/proto/message_set.proto";
import "tensorflow_metadata/proto/v0/path.proto";
import "tensorflow_metadata/proto/v0/schema.proto";
// Message to represent information about an individual anomaly.
message AnomalyInfo {
// Deleted fields.
reserved 1, 3;
// A path indicating where the anomaly occurred.
// Dataset-level anomalies do not have a path.
optional Path path = 8;
enum Severity {
UNKNOWN = 0;
WARNING = 1;
ERROR = 2;
}
optional Severity severity = 5;
// A description of the entire anomaly.
optional string description = 2;
// A shorter description, suitable for UI presentation.
// If there is a single reason for the anomaly, identical to
// reason[0].short_description. Otherwise, summarizes all the reasons.
optional string short_description = 6;
// The comparison between the existing schema and the fixed schema.
repeated DiffRegion diff_regions = 4;
// Anomaly types begin with the name of the file that threw the error.
enum Type {
UNKNOWN_TYPE = 0;
// Integer larger than 1
BOOL_TYPE_BIG_INT = 1;
// BYTES type when expected INT type
BOOL_TYPE_BYTES_NOT_INT = 2;
// BYTES type when expected STRING type
BOOL_TYPE_BYTES_NOT_STRING = 3;
// FLOAT type when expected INT type
BOOL_TYPE_FLOAT_NOT_INT = 4;
// FLOAT type when expected STRING type
BOOL_TYPE_FLOAT_NOT_STRING = 5;
// INT type when expected STRING type
BOOL_TYPE_INT_NOT_STRING = 6;
// Integer smaller than 0
BOOL_TYPE_SMALL_INT = 7;
// STRING type when expected INT type
BOOL_TYPE_STRING_NOT_INT = 8;
// Expected a string, but not the string seen
BOOL_TYPE_UNEXPECTED_STRING = 9;
// BYTES type when expected STRING type
ENUM_TYPE_BYTES_NOT_STRING = 10;
// FLOAT type when expected STRING type
ENUM_TYPE_FLOAT_NOT_STRING = 11;
// INT type when expected STRING type
ENUM_TYPE_INT_NOT_STRING = 12;
// Invalid UTF8 string observed
ENUM_TYPE_INVALID_UTF8 = 13;
// Unexpected string values
ENUM_TYPE_UNEXPECTED_STRING_VALUES = 14;
// The number of values in a given example is too large
FEATURE_TYPE_HIGH_NUMBER_VALUES = 15;
// The fraction of examples containing a feature is too small
FEATURE_TYPE_LOW_FRACTION_PRESENT = 16;
// The number of examples containing a feature is too small
FEATURE_TYPE_LOW_NUMBER_PRESENT = 17;
// The number of values in a given example is too small
FEATURE_TYPE_LOW_NUMBER_VALUES = 18;
// No examples contain the value
FEATURE_TYPE_NOT_PRESENT = 19;
// The feature is present as an empty list
FEATURE_TYPE_NO_VALUES = 20;
// The feature is repeated in an example, but was expected to be a singleton
FEATURE_TYPE_UNEXPECTED_REPEATED = 21;
// There is a float value that is too high
FLOAT_TYPE_BIG_FLOAT = 22;
// The type is not FLOAT
FLOAT_TYPE_NOT_FLOAT = 23;
// There is a float value that is too low
FLOAT_TYPE_SMALL_FLOAT = 24;
// The feature is supposed to be floats encoded as strings, but there is
// a string that is not a float
FLOAT_TYPE_STRING_NOT_FLOAT = 25;
// The feature is supposed to be floats encoded as strings, but it was
// some other type (INT, BYTES, FLOAT)
FLOAT_TYPE_NON_STRING = 26;
// The type is completely unknown
FLOAT_TYPE_UNKNOWN_TYPE_NUMBER = 27;
// There is an unexpectedly large integer
INT_TYPE_BIG_INT = 28;
// The type was supposed to be INT, but it was not.
INT_TYPE_INT_EXPECTED = 29;
// The feature is supposed to be ints encoded as strings, but some string
// was not an int.
INT_TYPE_NOT_INT_STRING = 30;
// The type was supposed to be STRING, but it was not.
INT_TYPE_NOT_STRING = 31;
// There is an unexpectedly small integer
INT_TYPE_SMALL_INT = 32;
// The feature is supposed to be ints encoded as strings, but it was
// some other type (INT, BYTES, FLOAT)
INT_TYPE_STRING_EXPECTED = 33;
// Unknown type in stats proto
INT_TYPE_UNKNOWN_TYPE_NUMBER = 34;
// There are no stats for a column at all
SCHEMA_MISSING_COLUMN = 35;
// There is a new column that is not in the schema.
SCHEMA_NEW_COLUMN = 36;
// Training serving skew issue
SCHEMA_TRAINING_SERVING_SKEW = 37;
// Expected STRING type, but it was FLOAT.
STRING_TYPE_NOW_FLOAT = 38;
// Expected STRING type, but it was INT.
STRING_TYPE_NOW_INT = 39;
// Control data is missing (either scoring data or previous day).
COMPARATOR_CONTROL_DATA_MISSING = 40;
// Treatment data is missing (either treatment data or current day).
COMPARATOR_TREATMENT_DATA_MISSING = 41;
// L infinity between treatment and control is high.
COMPARATOR_L_INFTY_HIGH = 42;
// No examples in the span.
NO_DATA_IN_SPAN = 43;
// The value feature of a sparse feature is missing and at least one
// feature defining the sparse feature is present.
SPARSE_FEATURE_MISSING_VALUE = 44;
// An index feature of a sparse feature is missing and at least one
// feature defining the sparse feature is present.
SPARSE_FEATURE_MISSING_INDEX = 45;
// The length of the features representing a sparse feature does not match.
SPARSE_FEATURE_LENGTH_MISMATCH = 46;
// Name collision between a sparse feature and raw feature.
SPARSE_FEATURE_NAME_COLLISION = 47;
// Invalid custom semantic domain.
SEMANTIC_DOMAIN_UPDATE = 48;
// There are not enough examples in the current data.
COMPARATOR_LOW_NUM_EXAMPLES = 49;
// There are too many examples in the current data.
COMPARATOR_HIGH_NUM_EXAMPLES = 50;
}
// Reason for the anomaly. There may be more than one reason,
// e.g. the field might be missing sometimes AND a new value is
// present.
message Reason {
optional Type type = 1 [default = UNKNOWN_TYPE];
// A short description of an anomaly, suitable for UI presentation.
optional string short_description = 2;
// A longer description of an anomaly.
optional string description = 3;
}
repeated Reason reason = 7;
}
// Message to represent the anomalies, which describe the mismatches (if any)
// between the stats and the schema.
message Anomalies {
// Deleted fields.
reserved 4;
// The baseline schema that is used.
oneof baseline_schema {
tensorflow.metadata.v0.Schema baseline = 1;
tensorflow.metadata.v0.Schema baseline_v1 = 6 [deprecated = true];
}
// Map from a column to the difference that it represents.
enum AnomalyNameFormat {
// At present, this indicates that the keys in anomaly_info
// refers to the raw field name in the Schema.
UNKNOWN = 0;
// The serialized path to a struct.
SERIALIZED_PATH = 1;
}
// The format of the keys in anomaly_info.
// If absent, default is DEFAULT.
optional AnomalyNameFormat anomaly_name_format = 7;
// Information about feature-level anomalies.
map<string, AnomalyInfo> anomaly_info = 2;
// Information about dataset-level anomalies.
optional AnomalyInfo dataset_anomaly_info = 8;
// True if numExamples == 0.
optional bool data_missing = 3;
// TODO(b/123519907): Remove this.
// The hook to attach any usage and tool specific metadata. Example:
// message SchemaStamp {
// // extension ID is any CL number that has not been used in an extension.
// extend proto2.bridge.MessageSet {
// optional StampedSchemaDiff message_set_extension = 123445554;
// }
// optional string schema_stamp = 1;
// }
//
// then, the following proto msg encodes an Anomalies with an embedded
// SchemaStamp:
//
// Anomalies {
// metadata {
// [SchemaStamp]: {
// schema_stamp: "stamp"
// }
// }
// }
// GOOGLE-LEGACY optional proto2.bridge.MessageSet metadata = 5;
}
// Describes a region in the comparison between two text artifacts. Note that
// a region also contains the contents of the two artifacts that correspond to
// the region.
message DiffRegion {
// Details for the chunk.
oneof details {
// An unchanged region of lines.
UnchangedRegion unchanged = 1;
// A region of lines removed from the left.
OneSideRegion removed = 2;
// A region of lines added to the right.
OneSideRegion added = 3;
// A region of lines that are different in the two artifacts.
ChangedRegion changed = 4;
// An unchanged region of lines whose contents are just hidden.
HiddenRegion hidden = 5;
}
}
// Describes a chunk that is the same in the two artifacts.
message UnchangedRegion {
// The starting lines of the chunk in the two artifacts.
optional int32 left_start = 1;
optional int32 right_start = 2;
// The contents of the chunk. These are the same in both artifacts.
repeated string contents = 3;
}
// Describes a chunk that applies to only one of the two artifacts.
message OneSideRegion {
// Starting line.
optional int32 start = 1;
// Contents.
repeated string contents = 2;
}
// Describes a chunk that represents changes in both artifacts over the same
// number of lines.
message ChangedRegion {
// Changed region in the left artifact, in terms of starting line number and
// contents.
optional int32 left_start = 1;
repeated string left_contents = 2;
// Ditto for the right artifact.
optional int32 right_start = 3;
repeated string right_contents = 4;
}
// A chunk that represents identical lines, whose contents are hidden.
message HiddenRegion {
// Starting lines in the two artifacts.
optional int32 left_start = 1;
optional int32 right_start = 2;
// Size of the region in terms of lines.
optional int32 size = 3;
}
You can’t perform that action at this time.