Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Opensearch schema draft #734

Merged
merged 19 commits into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions experimental/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

plugins {
id 'java'
}

repositories {
mavenCentral()
}

version '2.1.0.0-SNAPSHOT'

repositories {
mavenCentral()
}

dependencies {
implementation group: 'com.graphql-java', name: 'graphql-java', version: '14.0'
implementation 'com.graphql-java:graphql-java-extended-scalars:18.1'
implementation group: 'io.javaslang', name: 'javaslang', version: '2.0.6'
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.13.2'
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1'
implementation group: 'com.google.inject', name: 'guice', version: '4.1.0'
implementation group: 'org.opensearch', name: 'opensearch', version: '1.3.0'

testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1'
testImplementation group: 'org.hamcrest', name: 'hamcrest-core', version: '2.2'
testImplementation group: 'org.mockito', name: 'mockito-inline', version: '3.12.4'
testImplementation group: 'junit', name: 'junit', version: '4.13.2'
}

test {
useJUnitPlatform()
}
36 changes: 34 additions & 2 deletions experimental/schema/logs/agent.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,37 @@ enum AgentIdStatus {
# Examples include Beats. Agents may also run on observers. ECS agent.* fields shall
# be populated with details of the agent running on the host or observer where the
# event happened or the measurement was taken.'
type Agent implements BaseRecord{
type Agent implements BaseRecord @model {
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
# the event was generated by the source.
#
# If the event source has no original timestamp, this value is typically populated
# by the first time the event was received by the pipeline.
#
# Required field for all events.'
timestamp : Time!
#'Custom key/value pairs.
#
# Can be used to add meta information to events. Should not contain nested objects.
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
# For structured logs without an original message field, other fields can be
# concatenated to form a human-readable summary of the event.
#
# If multiple messages exist, they can be combined into one message.'
message: String
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: JSON

#'Unique identifier of this agent (if one exists).
#
# Example: For Beats this would be beat.id.'
Expand All @@ -22,12 +52,14 @@ type Agent implements BaseRecord{
# two Filebeat instances are running on the same host but a human readable separation
# is needed on which Filebeat instance data is coming from.'
name :String
description:String

# 'Type of the agent.
#
# The agent type always stays the same and should be given by the agent used.
# In case of Filebeat the agent would always be Filebeat also if two Filebeat
# instances are run on the same machine.'
aType :String
type :String

version :String
#'Extended build information for the agent.
Expand Down
63 changes: 29 additions & 34 deletions experimental/schema/logs/base.graphql
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
scalar Time
scalar Json
scalar Text
scalar DateTime
scalar JSON
scalar Long
scalar URL
scalar Url

scalar IP
scalar GeoPoint

directive @relation(mappingType: String) on FIELD_DEFINITION
directive @key(fields: [String!]!, name: String) on OBJECT
directive @model on OBJECT

type Query {
fetch(name : String) : [String]
}

# type of supported streams
enum StreamType {
logs
Expand All @@ -13,7 +24,7 @@ enum StreamType {
synthetics
}
# the most common attributes shared by all top level entities
type BaseRecord {
interface BaseRecord {
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
Expand All @@ -30,7 +41,7 @@ type BaseRecord {
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : Json
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
Expand All @@ -42,7 +53,7 @@ type BaseRecord {
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: Json
attributes: JSON
}
# data stream naming scheme uses the value of the data stream fields combine to the name of the actual data stream in the following manner: {data_stream.type}-{data_stream.dataset}-{data_stream.namespace}. This means the fields can only contain characters that are valid as part of names of data streams
type StreamSet {
Expand All @@ -55,18 +66,10 @@ type StreamSet {
namespace:String
# The field can contain anything that makes sense to signify the source of the data.
# Examples include nginx.access, prometheus, endpoint etc. For data streams that otherwise fit, but that do not have dataset set we use the value "generic" for the dataset value.
# event.dataset should have the same value as data_stream.dataset.
# event.dataset should have the same value as data_astream.dataset.
dataset:String
}

# top most level structuring an incoming format of any type of log
type LogRecord {
# The event's common characteristics
event: Event!
# A list of top-level observations which describe 'things' that happened, where observed and reported
observations: [BaseRecord]
}

# geographic related fields container
type Geo {
timezone:String
Expand All @@ -87,35 +90,27 @@ type Geo {
}

#An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet.
type AutonomousSystem {
type AutonomousSystem @key(fields: ["number"],name: "number" ){

# Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.
number: Long
number: Long!
# organization name such as amazon.com
organizationName:String
}

type Group {
# Name of the directory the group is a member of.
#
#For example, an LDAP or Active Directory domain name.
name:String
# Unique identifier for the group on the system/platform.
id:String!
id:ID!
# Name of the directory the group is a member of
domain:String
}

type User {
# Unique identifier of the user.
id:String!
# Short name or login of the user
#The organization fields enrich data with information about the company or entity the data is associated with
type Organization {
id:ID!
name:String
# User full name
fullName:String
# Name of the directory the user is a member of
domain:String
# User email address
email:String
# Array of user roles at the time of the event
roles:[String]
# Unique user hash to correlate information for a user in anonymized form.
hash:String
# the group in which the client is a member of
group:Group
}
}
49 changes: 0 additions & 49 deletions experimental/schema/logs/client.graphql

This file was deleted.

39 changes: 36 additions & 3 deletions experimental/schema/logs/cloud.graphql
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
type Cloud implements BaseRecord {
# Fields related to the cloud or infrastructure the events are coming from.
type Cloud implements BaseRecord @model{
#'Date/time when the event originated.
#
# This is the date/time extracted from the event, typically representing when
# the event was generated by the source.
#
# If the event source has no original timestamp, this value is typically populated
# by the first time the event was received by the pipeline.
#
# Required field for all events.'
timestamp : Time!
#'Custom key/value pairs.
#
# Can be used to add meta information to events. Should not contain nested objects.
# All values are stored as keyword.
#
# Example: '{"application": "foo-bar", "env": "production"}'
labels : JSON
# 'For log events the message field contains the log message, optimized
# for viewing in a log viewer.
#
# For structured logs without an original message field, other fields can be
# concatenated to form a human-readable summary of the event.
#
# If multiple messages exist, they can be combined into one message.'
message: String
# List of keywords used to tag each event.
tags: [String]
# Key-Value pairs representing vendor specific properties
attributes: JSON

# The cloud account or organization id used to identify different entities in a multi-tenant environment.
#
#Examples: AWS account id, Google Cloud ORG Id, or other unique identifier.
Expand Down Expand Up @@ -32,8 +63,10 @@ type Cloud implements BaseRecord {
#Examples: app engine, app service, cloud run, fargate, lambda.
serviceName:String

description:String

#Provides the cloud information of the origin entity in case of an incoming request or event.
origin:Cloud
origin:Cloud @relation(mappingType: "foreign")
#Provides the cloud information of the target entity in case of an outgoing request or event.
target:Cloud
target:Cloud @relation(mappingType: "foreign")
}
25 changes: 25 additions & 0 deletions experimental/schema/logs/codeSignature.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# These fields contain information about binary code signatures.
type CodeSignature {
# The hashing algorithm used to sign the process.
# This value can distinguish signatures when a file is signed multiple times by the same signer but with a different digest algorithm.
digestAlgorithm:String
# Boolean to capture if a signature is present.
exists:Boolean
# The identifier used to sign the process.
signingId:String
# Additional information about the certificate status.
# This is useful for logging cryptographic errors with the certificate validity or trust status. Leave unpopulated if the validity or trust of the certificate was unchecked.
status:String
# Subject name of the code signer
subjectName:String
# The team identifier used to sign the process.
teamId:String
#Date and time when the code signature was generated and signed.
timestamp:DateTime
#Stores the trust status of the certificate chain.
#Validating the trust of the certificate chain may be complicated, and this field should only be populated by tools that actively check the status.
trusted:Boolean
# Boolean to capture if the digital signature is verified against the binary content.
# Leave unpopulated if a certificate was unchecked.
valid:Boolean
}
Loading