Skip to content

Commit

Permalink
Merge pull request #725 from YANG-DB/opensearch-schema-draft
Browse files Browse the repository at this point in the history
Opensearch schema draft
  • Loading branch information
anirudha committed Aug 5, 2022
2 parents 6a99b56 + c7a1ef1 commit 06b93a8
Show file tree
Hide file tree
Showing 22 changed files with 24,568 additions and 0 deletions.
43 changes: 43 additions & 0 deletions experimental/schema/logs/agent.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
enum AgentIdStatus {
verified
mismatch
missing
auth_metadata_missing
}
#'The agent fields contain the data about the software entity, if any,
# that collects, detects, or observes events on a host, or takes measurements on
# a host.
#
# Examples include Beats. Agents may also run on observers. ECS agent.* fields shall
# be populated with details of the agent running on the host or observer where the
# event happened or the measurement was taken.'
type Agent implements BaseRecord{
#'Unique identifier of this agent (if one exists).
#
# Example: For Beats this would be beat.id.'
id :ID!
# 'Custom name of the agent.
#
# This is a name that can be given to an agent. This can be helpful if for example
# two Filebeat instances are running on the same host but a human readable separation
# is needed on which Filebeat instance data is coming from.'
name :String
# 'Type of the agent.
#
# The agent type always stays the same and should be given by the agent used.
# In case of Filebeat the agent would always be Filebeat also if two Filebeat
# instances are run on the same machine.'
aType :String

version :String
#'Extended build information for the agent.
#
# This field is intended to contain any build information that a data source
# may provide, no specific formatting is required.'
buildOriginal : String

#'Ephemeral identifier of this agent (if one exists).
#
# This id normally changes across restarts, but `agent.id` does not.'
ephemeralId : String
}
121 changes: 121 additions & 0 deletions experimental/schema/logs/base.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
scalar Time
scalar Json
scalar Long
scalar URL
scalar IP
scalar GeoPoint

# type of supported streams
enum StreamType {
logs
metrics
traces
synthetics
}
# the most common attributes shared by all top level entities
type BaseRecord {
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
# the event was generated by the source.
#
# If the event source has no original timestamp, this value is typically populated
# by the first time the event was received by the pipeline.
#
# Required field for all events.'
timestamp : Time!
#'Custom key/value pairs.
#
# Can be used to add meta information to events. Should not contain nested objects.
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : Json
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
# For structured logs without an original message field, other fields can be
# concatenated to form a human-readable summary of the event.
#
# If multiple messages exist, they can be combined into one message.'
message: String
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: Json
}
# data stream naming scheme uses the value of the data stream fields combine to the name of the actual data stream in the following manner: {data_stream.type}-{data_stream.dataset}-{data_stream.namespace}. This means the fields can only contain characters that are valid as part of names of data streams
type StreamSet {
#An overarching type for the data stream.
streamType: StreamType
# A user defined namespace. Namespaces are useful to allow grouping of data.
#
# Many users already organize their indices this way, and the data stream naming scheme now provides this best practice as a default. Many users will populate this field with default.
# If no value is used, it falls back to default.
namespace:String
# The field can contain anything that makes sense to signify the source of the data.
# Examples include nginx.access, prometheus, endpoint etc. For data streams that otherwise fit, but that do not have dataset set we use the value "generic" for the dataset value.
# event.dataset should have the same value as data_stream.dataset.
dataset:String
}

# top most level structuring an incoming format of any type of log
type LogRecord {
# The event's common characteristics
event: Event!
# A list of top-level observations which describe 'things' that happened, where observed and reported
observations: [BaseRecord]
}

# geographic related fields container
type Geo {
timezone:String
regionName:String
regionIsoCode:String
postalCode:String
# User-defined description of a location, at the level of granularity
# they care about
name:String
# Longitude and latitude
location:GeoPoint
countryName:String
countryIsoCode:String
continentName:String
# Two-letter code representing continent's name
continentCode:String
cityName:String
}

#An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet.
type AutonomousSystem {
# Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.
number: Long
# organization name such as amazon.com
organizationName:String
}

type Group {
# Unique identifier for the group on the system/platform.
id:String!
# Name of the directory the group is a member of
domain:String
}

type User {
# Unique identifier of the user.
id:String!
# Short name or login of the user
name:String
# User full name
fullName:String
# Name of the directory the user is a member of
domain:String
# User email address
email:String
# Array of user roles at the time of the event
roles:[String]
# Unique user hash to correlate information for a user in anonymized form.
hash:String
# the group in which the client is a member of
group:Group
}
49 changes: 49 additions & 0 deletions experimental/schema/logs/client.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#A client is defined as the initiator of a network connection for events
# regarding sessions, connections, or bidirectional flow records.
#
# For TCP events, the client is the initiator of the TCP connection that sends the
# SYN packet(s). For other protocols, the client is generally the initiator or requestor
# in the network transaction. Some systems use the term "originator" to refer the
# client in TCP connections. The client fields describe details about the system
# acting as the client in the network event. Client fields are usually populated
# in conjunction with server fields. Client fields are generally not populated for
# packet-level events.
#
# Client / server representations can add semantic context to an exchange, which
# is helpful to visualize the data in certain situations. If your context falls
# in that category, you should still ensure that source and destination are filled
# appropriately.
type Client implements BaseRecord {
# Client network address
address: String
#
as:AutonomousSystem
# The domain name of the client system.
domain:String
# Bytes sent from the client to the server
bytes:Long
# geographic related fields deriving from client's location
geo:Geo
# Translated IP of source based NAT sessions (e.g. internal client to internet)
natIpp:IP
# IP address of the client (IPv4 or IPv6).
ip:IP
# mac address of the client
mac:String
# port of the client
port:Long
# Translated port of source based NAT sessions
natPort:Long
# Packets sent from the client to the server
packets:Long
# The highest registered client domain, stripped of the subdomain.
registeredDomain:String
# The subdomain portion of a fully qualified domain name includes
# all of the names except the host name under the registered_domain
subdomain:String
# he effective top level domain (eTLD), also known as the domain
# suffix, is the last part of the domain name.
topLevelDomain:String
# Fields about the client side of a network connection, used with server
user:User
}
39 changes: 39 additions & 0 deletions experimental/schema/logs/cloud.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
type Cloud implements BaseRecord {
# The cloud account or organization id used to identify different entities in a multi-tenant environment.
#
#Examples: AWS account id, Google Cloud ORG Id, or other unique identifier.
accountId: String
# The cloud account name or alias used to identify different entities in a multi-tenant environment.
#
#Examples: AWS account name, Google Cloud ORG display name.
accountName: String
# Availability zone in which this host, resource, or service is located.
availabilityZone:String
#Instance ID of the host machine.
instanceId:String
# Instance name of the host machine.
instanceName:String
# Machine type of the host machine.
machineType:String
#The cloud project identifier.
#
#Examples: Google Cloud Project id, Azure Project id.
projectId:String
# The cloud project name.
#
#Examples: Google Cloud Project name, Azure Project name.
projectName:String
# Name of the cloud provider. Example values are aws, azure, gcp, or digitalocean.
provider:String
# Region in which this host, resource, or service is located
region:String
# The cloud service name is intended to distinguish services running on different platforms within a provider, eg AWS EC2 vs Lambda, GCP GCE vs App Engine, Azure VM vs App Server.
#
#Examples: app engine, app service, cloud run, fargate, lambda.
serviceName:String

#Provides the cloud information of the origin entity in case of an incoming request or event.
origin:Cloud
#Provides the cloud information of the target entity in case of an outgoing request or event.
target:Cloud
}
32 changes: 32 additions & 0 deletions experimental/schema/logs/container.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
type Container implements BaseRecord {
# container's unique id
id:ID!
# container's name
name:String
#Percent CPU used which is normalized by the number of CPU cores and it ranges from 0 to 1.
cpuUsage:Float
#The total number of bytes (gauge) read successfully (aggregated from all disks) since the last metric collection.
diskReadBytes:Long
#The total number of bytes (gauge) written successfully (aggregated from all disks) since the last metric collection.
diskWriteBytes:Long
# An array of digests of the image the container was built on
imageHash:String
# Name of the image the container was built on.
imageName:String
# Container image tags.
imageTag:[String]
# Container image labels.
labels:Json
# Memory usage percentage and it ranges from 0 to 1
memoryUsage:Long
# The number of bytes sent on all network interfaces
networkEgressBytes:Long
# The number of packets (gauge) sent out on all network interfaces
networkIngressPackets:Long
# The number of bytes received from all network interfaces
networkEgressBytes:Long
# The number of packets (gauge) received from all network interfaces
networkIngressPackets:Long
# Runtime managing this container.
runtime:String
}
38 changes: 38 additions & 0 deletions experimental/schema/logs/destination.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#Destination fields capture details about the receiver of a network exchange/packet. These fields are populated from a network event, packet, or other event containing details of a network transaction.
#
#Destination fields are usually populated in conjunction with source fields. The source and destination fields are considered the baseline and should always be filled if an event contains source and destination details from a network transaction. If the event also contains identification of the client and server roles, then the client and server fields should also be populated.
type Destination implements BaseRecord {
# Destination network address
address:String
# Bytes sent from the destination to the source.
bytes:Long
#The domain name of the destination system.
domain:String
#The ip address of the destination system.
ip:IP
#The mac address of the destination system.
mac:String
# Translated IP of source based NAT sessions (e.g. internal client to internet)
natIpp:IP
# port of the client
port:Long
# Translated port of source based NAT sessions
natPort:Long
# Packets sent from the destination to the source.
packets:Long
# The highest registered client domain, stripped of the subdomain.
registeredDomain:String
# The subdomain portion of a fully qualified domain name includes
# all of the names except the host name under the registered_domain
subdomain:String
# he effective top level domain (eTLD), also known as the domain
# suffix, is the last part of the domain name.
topLevelDomain:String

#Fields describing an Autonomous System (Internet routing prefix).
as:AutonomousSystem
# Fields describing a location.
geo:Geo
#Fields to describe the user relevant to the event.
user:User
}
Loading

0 comments on commit 06b93a8

Please sign in to comment.