Merge pull request #725 from YANG-DB/opensearch-schema-draft

Opensearch schema draft
opensearch-project · Aug 5, 2022 · 06b93a8 · 06b93a8
2 parents 6a99b56 + c7a1ef1
commit 06b93a8
Show file tree

Hide file tree

Showing 22 changed files with 24,568 additions and 0 deletions.
diff --git a/experimental/schema/logs/agent.graphql b/experimental/schema/logs/agent.graphql
@@ -0,0 +1,43 @@
+enum AgentIdStatus {
+    verified
+    mismatch
+    missing
+    auth_metadata_missing
+}
+#'The agent fields contain the data about the software entity, if any,
+#    that collects, detects, or observes events on a host, or takes measurements on
+#    a host.
+#
+#    Examples include Beats. Agents may also run on observers. ECS agent.* fields shall
+#    be populated with details of the agent running on the host or observer where the
+#    event happened or the measurement was taken.'
+type Agent implements BaseRecord{
+    #'Unique identifier of this agent (if one exists).
+    #
+    #        Example: For Beats this would be beat.id.'
+    id :ID!
+    #    'Custom name of the agent.
+    #
+    #        This is a name that can be given to an agent. This can be helpful if for example
+    #        two Filebeat instances are running on the same host but a human readable separation
+    #        is needed on which Filebeat instance data is coming from.'
+    name :String
+    #    'Type of the agent.
+    #
+    #        The agent type always stays the same and should be given by the agent used.
+    #        In case of Filebeat the agent would always be Filebeat also if two Filebeat
+    #        instances are run on the same machine.'
+    aType :String
+
+    version :String
+    #'Extended build information for the agent.
+    #
+    #        This field is intended to contain any build information that a data source
+    #        may provide, no specific formatting is required.'
+    buildOriginal : String
+
+    #'Ephemeral identifier of this agent (if one exists).
+    #
+    #        This id normally changes across restarts, but `agent.id` does not.'
+    ephemeralId : String
+}
diff --git a/experimental/schema/logs/base.graphql b/experimental/schema/logs/base.graphql
@@ -0,0 +1,121 @@
+scalar Time
+scalar Json
+scalar Long
+scalar URL
+scalar IP
+scalar GeoPoint
+
+# type of supported streams
+enum StreamType {
+    logs
+    metrics
+    traces
+    synthetics
+}
+# the most common attributes shared by all top level entities
+type BaseRecord {
+    #'Date/time when the event originated.
+    #
+    #        This is the date/time extracted from the event, typically representing when
+    #        the event was generated by the source.
+    #
+    #        If the event source has no original timestamp, this value is typically populated
+    #        by the first time the event was received by the pipeline.
+    #
+    #        Required field for all events.'
+    timestamp : Time!
+    #'Custom key/value pairs.
+    #
+    #        Can be used to add meta information to events. Should not contain nested objects.
+    #        All values are stored as keyword.
+    #
+    #        Example: '{"application": "foo-bar", "env": "production"}'
+    labels : Json
+    # 'For log events the message field contains the log message, optimized
+    #        for viewing in a log viewer.
+    #
+    #        For structured logs without an original message field, other fields can be
+    #        concatenated to form a human-readable summary of the event.
+    #
+    #        If multiple messages exist, they can be combined into one message.'
+    message: String
+    # List of keywords used to tag each event.
+    tags: [String]
+    # Key-Value pairs representing vendor specific properties
+    attributes: Json
+}
+# data stream naming scheme uses the value of the data stream fields combine to the name of the actual data stream in the following manner: {data_stream.type}-{data_stream.dataset}-{data_stream.namespace}. This means the fields can only contain characters that are valid as part of names of data streams
+type StreamSet {
+    #An overarching type for the data stream.
+    streamType: StreamType
+    #    A user defined namespace. Namespaces are useful to allow grouping of data.
+    #
+    # Many users already organize their indices this way, and the data stream naming scheme now provides this best practice as a default. Many users will populate this field with default.
+    #    If no value is used, it falls back to default.
+    namespace:String
+    #    The field can contain anything that makes sense to signify the source of the data.
+    # Examples include nginx.access, prometheus, endpoint etc. For data streams that otherwise fit, but that do not have dataset set we use the value "generic" for the dataset value.
+    #    event.dataset should have the same value as data_stream.dataset.
+    dataset:String
+}
+
+# top most level structuring an incoming format of any type of log
+type LogRecord {
+    #    The event's common characteristics
+    event: Event!
+    #    A list of top-level observations which describe 'things' that happened, where observed and reported
+    observations: [BaseRecord]
+}
+
+# geographic related fields container
+type Geo {
+    timezone:String
+    regionName:String
+    regionIsoCode:String
+    postalCode:String
+    #    User-defined description of a location, at the level of granularity
+    #        they care about
+    name:String
+    #    Longitude and latitude
+    location:GeoPoint
+    countryName:String
+    countryIsoCode:String
+    continentName:String
+    #    Two-letter code representing continent's name
+    continentCode:String
+    cityName:String
+}
+
+#An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet.
+type AutonomousSystem {
+    #    Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.
+    number: Long
+    #    organization name such as amazon.com
+    organizationName:String
+}
+
+type Group {
+    #    Unique identifier for the group on the system/platform.
+    id:String!
+    #    Name of the directory the group is a member of
+    domain:String
+}
+
+type User {
+    #    Unique identifier of the user.
+    id:String!
+    #    Short name or login of the user
+    name:String
+    #    User full name
+    fullName:String
+    #   Name of the directory the user is a member of
+    domain:String
+    #    User email address
+    email:String
+    #  Array of user roles at the time of the event
+    roles:[String]
+    #    Unique user hash to correlate information for a user in anonymized form.
+    hash:String
+    #    the group in which the client is a member of
+    group:Group
+}
diff --git a/experimental/schema/logs/client.graphql b/experimental/schema/logs/client.graphql
@@ -0,0 +1,49 @@
+#A client is defined as the initiator of a network connection for events
+#    regarding sessions, connections, or bidirectional flow records.
+#
+#    For TCP events, the client is the initiator of the TCP connection that sends the
+#    SYN packet(s). For other protocols, the client is generally the initiator or requestor
+#    in the network transaction. Some systems use the term "originator" to refer the
+#    client in TCP connections. The client fields describe details about the system
+#    acting as the client in the network event. Client fields are usually populated
+#    in conjunction with server fields. Client fields are generally not populated for
+#    packet-level events.
+#
+#    Client / server representations can add semantic context to an exchange, which
+#    is helpful to visualize the data in certain situations. If your context falls
+#    in that category, you should still ensure that source and destination are filled
+#    appropriately.
+type Client implements BaseRecord {
+    #    Client network address
+    address: String
+    #
+    as:AutonomousSystem
+    #    The domain name of the client system.
+    domain:String
+    #    Bytes sent from the client to the server
+    bytes:Long
+    #    geographic related fields deriving from client's location
+    geo:Geo
+    #    Translated IP of source based NAT sessions (e.g. internal client to internet)
+    natIpp:IP
+    #    IP address of the client (IPv4 or IPv6).
+    ip:IP
+    # mac address of the client
+    mac:String
+    # port of the client
+    port:Long
+    #    Translated port of source based NAT sessions
+    natPort:Long
+    #    Packets sent from the client to the server
+    packets:Long
+    #    The highest registered client domain, stripped of the subdomain.
+    registeredDomain:String
+    #    The subdomain portion of a fully qualified domain name includes
+    #        all of the names except the host name under the registered_domain
+    subdomain:String
+    #    he effective top level domain (eTLD), also known as the domain
+    #        suffix, is the last part of the domain name.
+    topLevelDomain:String
+    #    Fields about the client side of a network connection, used with server
+    user:User
+}
diff --git a/experimental/schema/logs/cloud.graphql b/experimental/schema/logs/cloud.graphql
@@ -0,0 +1,39 @@
+type Cloud implements BaseRecord {
+    #    The cloud account or organization id used to identify different entities in a multi-tenant environment.
+    #
+    #Examples: AWS account id, Google Cloud ORG Id, or other unique identifier.
+    accountId: String
+    #    The cloud account name or alias used to identify different entities in a multi-tenant environment.
+    #
+    #Examples: AWS account name, Google Cloud ORG display name.
+    accountName: String
+    #    Availability zone in which this host, resource, or service is located.
+    availabilityZone:String
+    #Instance ID of the host machine.
+    instanceId:String
+    #    Instance name of the host machine.
+    instanceName:String
+    #    Machine type of the host machine.
+    machineType:String
+    #The cloud project identifier.
+    #
+    #Examples: Google Cloud Project id, Azure Project id.
+    projectId:String
+    #    The cloud project name.
+    #
+    #Examples: Google Cloud Project name, Azure Project name.
+    projectName:String
+    #    Name of the cloud provider. Example values are aws, azure, gcp, or digitalocean.
+    provider:String
+    #    Region in which this host, resource, or service is located
+    region:String
+    #    The cloud service name is intended to distinguish services running on different platforms within a provider, eg AWS EC2 vs Lambda, GCP GCE vs App Engine, Azure VM vs App Server.
+    #
+    #Examples: app engine, app service, cloud run, fargate, lambda.
+    serviceName:String
+
+    #Provides the cloud information of the origin entity in case of an incoming request or event.
+    origin:Cloud
+    #Provides the cloud information of the target entity in case of an outgoing request or event.
+    target:Cloud
+}
diff --git a/experimental/schema/logs/container.graphql b/experimental/schema/logs/container.graphql
@@ -0,0 +1,32 @@
+type Container implements BaseRecord {
+    #    container's unique id
+    id:ID!
+    #    container's name
+    name:String
+    #Percent CPU used which is normalized by the number of CPU cores and it ranges from 0 to 1.
+    cpuUsage:Float
+    #The total number of bytes (gauge) read successfully (aggregated from all disks) since the last metric collection.
+    diskReadBytes:Long
+    #The total number of bytes (gauge) written successfully (aggregated from all disks) since the last metric collection.
+    diskWriteBytes:Long
+    #    An array of digests of the image the container was built on
+    imageHash:String
+    #    Name of the image the container was built on.
+    imageName:String
+    #    Container image tags.
+    imageTag:[String]
+    #    Container image labels.
+    labels:Json
+    #    Memory usage percentage and it ranges from 0 to 1
+    memoryUsage:Long
+    #    The number of bytes sent on all network interfaces
+    networkEgressBytes:Long
+    #    The number of packets (gauge) sent out on all network interfaces
+    networkIngressPackets:Long
+    #    The number of bytes received from all network interfaces
+    networkEgressBytes:Long
+    #    The number of packets (gauge) received  from all network interfaces
+    networkIngressPackets:Long
+    #    Runtime managing this container.
+    runtime:String
+}
diff --git a/experimental/schema/logs/destination.graphql b/experimental/schema/logs/destination.graphql
@@ -0,0 +1,38 @@
+#Destination fields capture details about the receiver of a network exchange/packet. These fields are populated from a network event, packet, or other event containing details of a network transaction.
+#
+#Destination fields are usually populated in conjunction with source fields. The source and destination fields are considered the baseline and should always be filled if an event contains source and destination details from a network transaction. If the event also contains identification of the client and server roles, then the client and server fields should also be populated.
+type Destination implements BaseRecord {
+    #    Destination network address
+    address:String
+    #    Bytes sent from the destination to the source.
+    bytes:Long
+    #The domain name of the destination system.
+    domain:String
+    #The ip address of the destination system.
+    ip:IP
+    #The mac address  of the destination system.
+    mac:String
+    #    Translated IP of source based NAT sessions (e.g. internal client to internet)
+    natIpp:IP
+    # port of the client
+    port:Long
+    #    Translated port of source based NAT sessions
+    natPort:Long
+    #    Packets sent from the destination to the source.
+    packets:Long
+    #    The highest registered client domain, stripped of the subdomain.
+    registeredDomain:String
+    #    The subdomain portion of a fully qualified domain name includes
+    #        all of the names except the host name under the registered_domain
+    subdomain:String
+    #    he effective top level domain (eTLD), also known as the domain
+    #        suffix, is the last part of the domain name.
+    topLevelDomain:String
+
+    #Fields describing an Autonomous System (Internet routing prefix).
+    as:AutonomousSystem
+    #    Fields describing a location.
+    geo:Geo
+    #Fields to describe the user relevant to the event.
+    user:User
+}