Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
72 lines (65 sloc) 2.02 KB
# Specify server locations in a SOLR_LOCATOR variable; used later in
# variable substitutions:
SOLR_LOCATOR : {
# Name of solr collection
collection : reviews
# ZooKeeper ensemble
zkHost : "127.0.0.1:2181/solr"
}
# Specify an array of one or more morphlines, each of which defines an ETL
# transformation chain. A morphline consists of one or more (potentially
# nested) commands. A morphline is a way to consume records (e.g. Flume events,
# HDFS files or blocks), turn them into a stream of records, and pipe the stream
# of records through a set of easily configurable transformations on the way to
# a target application such as Solr.
morphlines : [
{
id : reviews
importCommands : ["org.kitesdk.**", "org.apache.solr.**"]
commands : [
{
readCSV {
separator : ","
columns : [business_id, cool, date, funny, id, stars, text, type, useful, user_id, name, city, full_address, latitude, longitude, neighborhoods, open, review_count, state]
quoteChar : "\""
}
}
{
if {
conditions : [
{
equals { id : [] }
}
]
then : [
{
dropRecord {}
}
]
}
}
# Consume the output record of the previous command and pipe another
# record downstream.
#
# Command that deletes record fields that are unknown to Solr
# schema.xml.
#
# Recall that Solr throws an exception on any attempt to load a document
# that contains a field that isn't specified in schema.xml.
{
sanitizeUnknownSolrFields {
# Location from which to fetch Solr schema
solrLocator : ${SOLR_LOCATOR}
}
}
# log the record at DEBUG level to SLF4J
{ logDebug { format : "output record: {}", args : ["@{}"] } }
# load the record into a Solr server or MapReduce Reducer
{
loadSolr {
solrLocator : ${SOLR_LOCATOR}
}
}
]
}
]