forked from anair-it/nifi-docker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
applog-collector.xml
8 lines (8 loc) · 21.3 KB
/
applog-collector.xml
1
2
3
4
5
6
7
8
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><template><description></description><name>applog-collector</name><snippet><processGroups><id>65f617a2-b5b9-4167-898a-6aac885ca264</id><parentGroupId>3674ee34-a08f-44b7-b5a8-ce2a7e146fc2</parentGroupId><position><x>200.0</x><y>-6.0</y></position><activeRemotePortCount>0</activeRemotePortCount><comments></comments><contents><connections><id>c0d6eaa7-8096-45c2-8b98-ff9fdf48155a</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>10000</backPressureObjectThreshold><destination><groupId>65f617a2-b5b9-4167-898a-6aac885ca264</groupId><id>f313a55b-4c7e-4012-b182-cbfd767c5a66</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>batch json files</name><prioritizers>org.apache.nifi.prioritizer.FirstInFirstOutPrioritizer</prioritizers><selectedRelationships>success</selectedRelationships><source><groupId>65f617a2-b5b9-4167-898a-6aac885ca264</groupId><id>8413e95e-a136-4359-b09f-5343506a67c8</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><connections><id>c30beabf-76cd-48e5-a112-73ddc5ff09a2</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>1000</backPressureObjectThreshold><destination><groupId>65f617a2-b5b9-4167-898a-6aac885ca264</groupId><id>2d16a5fe-c405-4c0e-9180-c3e01d79ae16</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>json splits</name><prioritizers>org.apache.nifi.prioritizer.FirstInFirstOutPrioritizer</prioritizers><selectedRelationships>splits</selectedRelationships><source><groupId>65f617a2-b5b9-4167-898a-6aac885ca264</groupId><id>f313a55b-4c7e-4012-b182-cbfd767c5a66</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><labels><id>3cf3f380-e9ea-48d2-919d-ea0d17f3765b</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>508.5</x><y>-25.5</y></position><height>84.0</height><label>1. Application will emit logs in JSON format through log4j.
2. The logs are sent through log4j UDP appender
3. This processor collects a batch of JSON log messages</label><style><entry><key>font-size</key><value>12px</value></entry></style><width>345.0</width></labels><labels><id>5d3152fc-984c-4f13-a847-9fffc40379f0</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>509.5</x><y>148.5</y></position><height>40.0</height><label>Split the batch to multiple flow files</label><style><entry><key>font-size</key><value>12px</value></entry></style><width>221.0</width></labels><labels><id>878e9881-4612-464d-a2e5-315249e13a6e</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>508.5</x><y>359.5</y></position><height>52.0</height><label>1. Define elasticsearch index 'applog'
2. Define elasticsearch connection info
3. Write each JSON log to Elasticsearch</label><style><entry><key>font-size</key><value>12px</value></entry></style><width>276.0</width></labels><processors><id>f313a55b-4c7e-4012-b182-cbfd767c5a66</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>194.5</x><y>148.0</y></position><config><bulletinLevel>ERROR</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>10</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Line Split Count</key><value><description>The number of lines that will be added to each split file (excluding the header, if the Header Line Count property is greater than 0).</description><displayName>Line Split Count</displayName><dynamic>false</dynamic><name>Line Split Count</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Header Line Count</key><value><defaultValue>0</defaultValue><description>The number of lines that should be considered part of the header; the header lines will be duplicated to all split files</description><displayName>Header Line Count</displayName><dynamic>false</dynamic><name>Header Line Count</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Remove Trailing Newlines</key><value><allowableValues><displayName>true</displayName><value>true</value></allowableValues><allowableValues><displayName>false</displayName><value>false</value></allowableValues><defaultValue>true</defaultValue><description>Whether to remove newlines at the end of each split file. This should be false if you intend to merge the split files later. If this is set to 'true' and a FlowFile is generated that contains only 'empty lines' (i.e., consists only of 
 and
characters), the FlowFile will not be emitted. Note, however, that if the Header Line Count is greater than 0, the resultant FlowFile will never be empty as it will consist of the header lines, so a FlowFile may be emitted that contians only the header lines.</description><displayName>Remove Trailing Newlines</displayName><dynamic>false</dynamic><name>Remove Trailing Newlines</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Line Split Count</key><value>1</value></entry><entry><key>Header Line Count</key><value>0</value></entry><entry><key>Remove Trailing Newlines</key><value>true</value></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>split batch into multiple flow files</name><relationships><autoTerminate>true</autoTerminate><description>If a file cannot be split for some reason, the original file will be routed to this destination and nothing will be routed elsewhere</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>The original input file will be routed to this destination when it has been successfully split into 1 or more files</description><name>original</name></relationships><relationships><autoTerminate>false</autoTerminate><description>The split files will be routed to this destination when an input file is successfully split into 1 or more split files</description><name>splits</name></relationships><state>STOPPED</state><style/><supportsEventDriven>true</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.SplitText</type></processors><processors><id>8413e95e-a136-4359-b09f-5343506a67c8</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>194.0</x><y>-27.0</y></position><config><bulletinLevel>ERROR</bulletinLevel><comments>1. Application will emit logs in JSON format through log4j.
2. The logs are sent through log4j UDP appender
3. This processor collects the logs in batch mode</comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Local Network Interface</key><value><description>The name of a local network interface to be used to restrict listening to a specific LAN.</description><displayName>Local Network Interface</displayName><dynamic>false</dynamic><name>Local Network Interface</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Port</key><value><description>The port to listen on for communication.</description><displayName>Port</displayName><dynamic>false</dynamic><name>Port</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Receive Buffer Size</key><value><defaultValue>65507 B</defaultValue><description>The size of each buffer used to receive messages. Adjust this value appropriately based on the expected size of the incoming messages.</description><displayName>Receive Buffer Size</displayName><dynamic>false</dynamic><name>Receive Buffer Size</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Max Size of Message Queue</key><value><defaultValue>10000</defaultValue><description>The maximum size of the internal queue used to buffer messages being transferred from the underlying channel to the processor. Setting this value higher allows more messages to be buffered in memory during surges of incoming messages, but increases the total memory used by the processor.</description><displayName>Max Size of Message Queue</displayName><dynamic>false</dynamic><name>Max Size of Message Queue</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Max Size of Socket Buffer</key><value><defaultValue>1 MB</defaultValue><description>The maximum size of the socket buffer that should be used. This is a suggestion to the Operating System to indicate how big the socket buffer should be. If this value is set too low, the buffer may fill up before the data can be read, and incoming data will be dropped.</description><displayName>Max Size of Socket Buffer</displayName><dynamic>false</dynamic><name>Max Size of Socket Buffer</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Character Set</key><value><defaultValue>UTF-8</defaultValue><description>Specifies the character set of the received data.</description><displayName>Character Set</displayName><dynamic>false</dynamic><name>Character Set</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Max Batch Size</key><value><defaultValue>1</defaultValue><description>The maximum number of messages to add to a single FlowFile. If multiple messages are available, they will be concatenated along with the <Message Delimiter> up to this configured maximum number of messages</description><displayName>Max Batch Size</displayName><dynamic>false</dynamic><name>Max Batch Size</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Message Delimiter</key><value><defaultValue>\n</defaultValue><description>Specifies the delimiter to place between messages when multiple messages are bundled together (see <Max Batch Size> property).</description><displayName>Batching Message Delimiter</displayName><dynamic>false</dynamic><name>Message Delimiter</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Sending Host</key><value><description>IP, or name, of a remote host. Only Datagrams from the specified Sending Host Port and this host will be accepted. Improves Performance. May be a system property or an environment variable.</description><displayName>Sending Host</displayName><dynamic>false</dynamic><name>Sending Host</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Sending Host Port</key><value><description>Port being used by remote host to send Datagrams. Only Datagrams from the specified Sending Host and this port will be accepted. Improves Performance. May be a system property or an environment variable.</description><displayName>Sending Host Port</displayName><dynamic>false</dynamic><name>Sending Host Port</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Local Network Interface</key></entry><entry><key>Port</key><value>9991</value></entry><entry><key>Receive Buffer Size</key><value>65507 B</value></entry><entry><key>Max Size of Message Queue</key><value>10000</value></entry><entry><key>Max Size of Socket Buffer</key><value>1 MB</value></entry><entry><key>Character Set</key><value>UTF-8</value></entry><entry><key>Max Batch Size</key><value>1</value></entry><entry><key>Message Delimiter</key><value>\n</value></entry><entry><key>Sending Host</key></entry><entry><key>Sending Host Port</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>listen to application logs</name><relationships><autoTerminate>false</autoTerminate><description>Messages received successfully will be sent out this relationship.</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.ListenUDP</type></processors><processors><id>2d16a5fe-c405-4c0e-9180-c3e01d79ae16</id><parentGroupId>65f617a2-b5b9-4167-898a-6aac885ca264</parentGroupId><position><x>194.5</x><y>361.0</y></position><config><bulletinLevel>ERROR</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Cluster Name</key><value><defaultValue>elasticsearch</defaultValue><description>Name of the ES cluster (for example, elasticsearch_brew). Defaults to 'elasticsearch'</description><displayName>Cluster Name</displayName><dynamic>false</dynamic><name>Cluster Name</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>ElasticSearch Hosts</key><value><description>ElasticSearch Hosts, which should be comma separated and colon for hostname/port host1:port,host2:port,.... For example testcluster:9300.</description><displayName>ElasticSearch Hosts</displayName><dynamic>false</dynamic><name>ElasticSearch Hosts</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>SSL Context Service</key><value><description>The SSL Context Service used to provide client certificate information for TLS/SSL connections. This service only applies if the Shield plugin is available.</description><displayName>SSL Context Service</displayName><dynamic>false</dynamic><identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService><name>SSL Context Service</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Shield Plugin Filename</key><value><description>Specifies the path to the JAR for the Elasticsearch Shield plugin. If the Elasticsearch cluster has been secured with the Shield plugin, then the Shield plugin JAR must also be available to this processor. Note: Do NOT place the Shield JAR into NiFi's lib/ directory, doing so will prevent the Shield plugin from being loaded.</description><displayName>Shield Plugin Filename</displayName><dynamic>false</dynamic><name>Shield Plugin Filename</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Username</key><value><description>Username to access the Elasticsearch cluster</description><displayName>Username</displayName><dynamic>false</dynamic><name>Username</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Password</key><value><description>Password to access the Elasticsearch cluster</description><displayName>Password</displayName><dynamic>false</dynamic><name>Password</name><required>false</required><sensitive>true</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>ElasticSearch Ping Timeout</key><value><defaultValue>5s</defaultValue><description>The ping timeout used to determine when a node is unreachable. For example, 5s (5 seconds). If non-local recommended is 30s</description><displayName>ElasticSearch Ping Timeout</displayName><dynamic>false</dynamic><name>ElasticSearch Ping Timeout</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Sampler Interval</key><value><defaultValue>5s</defaultValue><description>How often to sample / ping the nodes listed and connected. For example, 5s (5 seconds). If non-local recommended is 30s.</description><displayName>Sampler Interval</displayName><dynamic>false</dynamic><name>Sampler Interval</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Identifier Attribute</key><value><description>The name of the attribute containing the identifier for each FlowFile</description><displayName>Identifier Attribute</displayName><dynamic>false</dynamic><name>Identifier Attribute</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Index</key><value><description>The name of the index to insert into</description><displayName>Index</displayName><dynamic>false</dynamic><name>Index</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Type</key><value><description>The type of this document (used by Elasticsearch for indexing and searching)</description><displayName>Type</displayName><dynamic>false</dynamic><name>Type</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Character Set</key><value><defaultValue>UTF-8</defaultValue><description>Specifies the character set of the document data.</description><displayName>Character Set</displayName><dynamic>false</dynamic><name>Character Set</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Batch Size</key><value><defaultValue>100</defaultValue><description>The preferred number of FlowFiles to put to the database in a single transaction</description><displayName>Batch Size</displayName><dynamic>false</dynamic><name>Batch Size</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Cluster Name</key><value>elasticsearch</value></entry><entry><key>ElasticSearch Hosts</key><value>elasticsearch:9300</value></entry><entry><key>SSL Context Service</key></entry><entry><key>Shield Plugin Filename</key></entry><entry><key>Username</key></entry><entry><key>Password</key></entry><entry><key>ElasticSearch Ping Timeout</key><value>5s</value></entry><entry><key>Sampler Interval</key><value>5s</value></entry><entry><key>Identifier Attribute</key><value>filename</value></entry><entry><key>Index</key><value>applog</value></entry><entry><key>Type</key><value>myapp</value></entry><entry><key>Character Set</key><value>UTF-8</value></entry><entry><key>Batch Size</key><value>100</value></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>Write log messages to elasticsearch</name><relationships><autoTerminate>true</autoTerminate><description>All FlowFiles that cannot be written to Elasticsearch are routed to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed</description><name>retry</name></relationships><relationships><autoTerminate>true</autoTerminate><description>All FlowFiles that are written to Elasticsearch are routed to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>true</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.elasticsearch.PutElasticsearch</type></processors></contents><disabledCount>0</disabledCount><inactiveRemotePortCount>0</inactiveRemotePortCount><inputPortCount>0</inputPortCount><invalidCount>0</invalidCount><name>applog-collector</name><outputPortCount>0</outputPortCount><parent><id>3674ee34-a08f-44b7-b5a8-ce2a7e146fc2</id><name>NiFi Flow</name></parent><runningCount>0</runningCount><stoppedCount>3</stoppedCount></processGroups></snippet><timestamp>05/19/2016 23:10:01 GMT</timestamp></template>