Skip to content

Commit

Permalink
Merge pull request #734 from YANG-DB/opensearch-schema-draft
Browse files Browse the repository at this point in the history
Opensearch schema draft
  • Loading branch information
anirudha committed Aug 18, 2022
2 parents 06b93a8 + a8d95ff commit 1572ef2
Show file tree
Hide file tree
Showing 103 changed files with 12,236 additions and 160 deletions.
38 changes: 38 additions & 0 deletions experimental/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

plugins {
id 'java'
}

repositories {
mavenCentral()
}

version '2.1.0.0-SNAPSHOT'

repositories {
mavenCentral()
}

dependencies {
implementation group: 'com.graphql-java', name: 'graphql-java', version: '14.0'
implementation 'com.graphql-java:graphql-java-extended-scalars:18.1'
implementation group: 'io.javaslang', name: 'javaslang', version: '2.0.6'
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.13.2'
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1'
implementation group: 'com.google.inject', name: 'guice', version: '4.1.0'
implementation group: 'org.opensearch', name: 'opensearch', version: '1.3.0'

testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1'
testImplementation group: 'org.hamcrest', name: 'hamcrest-core', version: '2.2'
testImplementation group: 'org.mockito', name: 'mockito-inline', version: '3.12.4'
testImplementation group: 'junit', name: 'junit', version: '4.13.2'
}

test {
useJUnitPlatform()
}
36 changes: 34 additions & 2 deletions experimental/schema/logs/agent.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,37 @@ enum AgentIdStatus {
# Examples include Beats. Agents may also run on observers. ECS agent.* fields shall
# be populated with details of the agent running on the host or observer where the
# event happened or the measurement was taken.'
type Agent implements BaseRecord{
type Agent implements BaseRecord @model {
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
# the event was generated by the source.
#
# If the event source has no original timestamp, this value is typically populated
# by the first time the event was received by the pipeline.
#
# Required field for all events.'
timestamp : Time!
#'Custom key/value pairs.
#
# Can be used to add meta information to events. Should not contain nested objects.
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
# For structured logs without an original message field, other fields can be
# concatenated to form a human-readable summary of the event.
#
# If multiple messages exist, they can be combined into one message.'
message: String
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: JSON

#'Unique identifier of this agent (if one exists).
#
# Example: For Beats this would be beat.id.'
Expand All @@ -22,12 +52,14 @@ type Agent implements BaseRecord{
# two Filebeat instances are running on the same host but a human readable separation
# is needed on which Filebeat instance data is coming from.'
name :String
description:String

# 'Type of the agent.
#
# The agent type always stays the same and should be given by the agent used.
# In case of Filebeat the agent would always be Filebeat also if two Filebeat
# instances are run on the same machine.'
aType :String
type :String

version :String
#'Extended build information for the agent.
Expand Down
63 changes: 29 additions & 34 deletions experimental/schema/logs/base.graphql
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
scalar Time
scalar Json
scalar Text
scalar DateTime
scalar JSON
scalar Long
scalar URL
scalar Url

scalar IP
scalar GeoPoint

directive @relation(mappingType: String) on FIELD_DEFINITION
directive @key(fields: [String!]!, name: String) on OBJECT
directive @model on OBJECT

type Query {
fetch(name : String) : [String]
}

# type of supported streams
enum StreamType {
logs
Expand All @@ -13,7 +24,7 @@ enum StreamType {
synthetics
}
# the most common attributes shared by all top level entities
type BaseRecord {
interface BaseRecord {
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
Expand All @@ -30,7 +41,7 @@ type BaseRecord {
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : Json
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
Expand All @@ -42,7 +53,7 @@ type BaseRecord {
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: Json
attributes: JSON
}
# data stream naming scheme uses the value of the data stream fields combine to the name of the actual data stream in the following manner: {data_stream.type}-{data_stream.dataset}-{data_stream.namespace}. This means the fields can only contain characters that are valid as part of names of data streams
type StreamSet {
Expand All @@ -55,18 +66,10 @@ type StreamSet {
namespace:String
# The field can contain anything that makes sense to signify the source of the data.
# Examples include nginx.access, prometheus, endpoint etc. For data streams that otherwise fit, but that do not have dataset set we use the value "generic" for the dataset value.
# event.dataset should have the same value as data_stream.dataset.
# event.dataset should have the same value as data_astream.dataset.
dataset:String
}

# top most level structuring an incoming format of any type of log
type LogRecord {
# The event's common characteristics
event: Event!
# A list of top-level observations which describe 'things' that happened, where observed and reported
observations: [BaseRecord]
}

# geographic related fields container
type Geo {
timezone:String
Expand All @@ -87,35 +90,27 @@ type Geo {
}

#An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet.
type AutonomousSystem {
type AutonomousSystem @key(fields: ["number"],name: "number" ){

# Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.
number: Long
number: Long!
# organization name such as amazon.com
organizationName:String
}

type Group {
# Name of the directory the group is a member of.
#
#For example, an LDAP or Active Directory domain name.
name:String
# Unique identifier for the group on the system/platform.
id:String!
id:ID!
# Name of the directory the group is a member of
domain:String
}

type User {
# Unique identifier of the user.
id:String!
# Short name or login of the user
#The organization fields enrich data with information about the company or entity the data is associated with
type Organization {
id:ID!
name:String
# User full name
fullName:String
# Name of the directory the user is a member of
domain:String
# User email address
email:String
# Array of user roles at the time of the event
roles:[String]
# Unique user hash to correlate information for a user in anonymized form.
hash:String
# the group in which the client is a member of
group:Group
}
}
49 changes: 0 additions & 49 deletions experimental/schema/logs/client.graphql

This file was deleted.

39 changes: 36 additions & 3 deletions experimental/schema/logs/cloud.graphql
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
type Cloud implements BaseRecord {
# Fields related to the cloud or infrastructure the events are coming from.
type Cloud implements BaseRecord @model{
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
# the event was generated by the source.
#
# If the event source has no original timestamp, this value is typically populated
# by the first time the event was received by the pipeline.
#
# Required field for all events.'
timestamp : Time!
#'Custom key/value pairs.
#
# Can be used to add meta information to events. Should not contain nested objects.
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
# For structured logs without an original message field, other fields can be
# concatenated to form a human-readable summary of the event.
#
# If multiple messages exist, they can be combined into one message.'
message: String
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: JSON

# The cloud account or organization id used to identify different entities in a multi-tenant environment.
#
#Examples: AWS account id, Google Cloud ORG Id, or other unique identifier.
Expand Down Expand Up @@ -32,8 +63,10 @@ type Cloud implements BaseRecord {
#Examples: app engine, app service, cloud run, fargate, lambda.
serviceName:String

description:String

#Provides the cloud information of the origin entity in case of an incoming request or event.
origin:Cloud
origin:Cloud @relation(mappingType: "foreign")
#Provides the cloud information of the target entity in case of an outgoing request or event.
target:Cloud
target:Cloud @relation(mappingType: "foreign")
}
25 changes: 25 additions & 0 deletions experimental/schema/logs/codeSignature.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# These fields contain information about binary code signatures.
type CodeSignature {
# The hashing algorithm used to sign the process.
# This value can distinguish signatures when a file is signed multiple times by the same signer but with a different digest algorithm.
digestAlgorithm:String
# Boolean to capture if a signature is present.
exists:Boolean
# The identifier used to sign the process.
signingId:String
# Additional information about the certificate status.
# This is useful for logging cryptographic errors with the certificate validity or trust status. Leave unpopulated if the validity or trust of the certificate was unchecked.
status:String
# Subject name of the code signer
subjectName:String
# The team identifier used to sign the process.
teamId:String
#Date and time when the code signature was generated and signed.
timestamp:DateTime
#Stores the trust status of the certificate chain.
#Validating the trust of the certificate chain may be complicated, and this field should only be populated by tools that actively check the status.
trusted:Boolean
# Boolean to capture if the digital signature is verified against the binary content.
# Leave unpopulated if a certificate was unchecked.
valid:Boolean
}
Loading

0 comments on commit 1572ef2

Please sign in to comment.