# Flume
![Flume](https://flume.apache.org/_static/flume-logo.png)

- https://flume.apache.org

## Setup

- download from https://flume.apache.org/download.html
- version 1.9.0

In [None]:
%%bash

# Download package
cd /opt/pkgs
wget -q -c https://downloads.apache.org/flume/1.9.0/apache-flume-1.9.0-bin.tar.gz

# unpack file and create link
tar -zxf /opt/pkgs/apache-flume-1.9.0-bin.tar.gz -C /opt
ln -s /opt/apache-flume-1.9.0-bin /opt/flume

# update guava library on Flume
rm -f /opt/flume/lib/guava-11.0.2.jar 
cp -f /opt/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /opt/flume/lib

# update envvars.sh
cat >> /opt/envvars.sh << EOF
# Flume
export FLUME_HOME=/opt/flume
export PATH=\${PATH}:\${FLUME_HOME}/bin

EOF

cat /opt/envvars.sh

In [None]:
# Load environment variables
%load_ext dotenv
%dotenv -o /opt/envvars.sh
%env

## Tailagent example

- https://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html

In [None]:
%%writefile /opt/flume/conf/tailagent.conf
# Agent components
tailagent.sources = execsource
tailagent.channels = memchannel
tailagent.sinks = hdfssink

# Configuring source
tailagent.sources.execsource.type = exec
tailagent.sources.execsource.command = tail -F /tmp/events

# Configuring sink
tailagent.sinks.hdfssink.type = hdfs
tailagent.sinks.hdfssink.hdfs.path = /tmp
tailagent.sinks.hdfssink.hdfs.filePrefix = tailevents-
tailagent.sinks.hdfssink.hdfs.fileType = DataStream

# Configuring channel
tailagent.channels.memchannel.type = memory

# Bind the source and sink to the channel 
tailagent.sources.execsource.channels = memchannel
tailagent.sinks.hdfssink.channel = memchannel

In [None]:
%%bash

# run agent in background
cd /opt/flume

flume-ng agent -n tailagent -c ./conf \
-f ./conf/tailagent.conf > ./tailagent.output 2>&1 &
echo $! > ./tailagent.pid

ps -fp $(cat ./tailagent.pid)

In [None]:
%%bash

# run random generator in background
cd /opt/flume

cat > randomgen.sh << EOF
while true
do
    echo \${RANDOM} >> /tmp/events
    sleep 1
done
EOF

chmod +x randomgen.sh
./randomgen.sh > /dev/null 2>&1 &
echo $! > ./randomgen.pid

ps -fp $(cat ./randomgen.pid)

In [None]:
%%bash

# check files generated in HDFS
hdfs dfs -ls /tmp/tailevents*

In [None]:
%%bash

# cat files
hdfs dfs -cat /tmp/tailevents*

In [None]:
%%bash

cd /opt/flume

# kill random generator
kill $(cat randomgen.pid)
rm randomgen.pid

# kill tailagent
kill $(cat tailagent.pid)
rm tailagent.pid
rm tailagent.output

# remove files
hdfs dfs -rm /tmp/tailevents*