Skip to content
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
257 lines (218 sloc) 9.92 KB
<?xml version="1.0" encoding="UTF-8" ?>
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
This is a stripped down config file used for a simple example...
It is *not* a good example to work from.
<lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory, the default, is filesystem based.
solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
<!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
<schemaFactory class="ManagedIndexSchemaFactory">
<bool name="mutable">true</bool>
<str name="managedSchemaResourceName">managed-schema</str>
When ManagedIndexSchemaFactory is specified, Solr will load the schema from
he resource named in 'managedSchemaResourceName', rather than from schema.xml.
Note that the managed schema resource CANNOT be named schema.xml. If the managed
schema does not exist, Solr will create it after reading schema.xml, then rename
'schema.xml' to 'schema.xml.bak'.
Do NOT hand edit the managed schema - external modifications will be ignored and
overwritten as a result of schema modification REST API calls.
When ManagedIndexSchemaFactory is specified with mutable = true, schema
modification REST API calls will be allowed; otherwise, error responses will be
sent back for these requests.
<schemaFactory class="ClassicIndexSchemaFactory"/>
<updateHandler class="solr.DirectUpdateHandler2">
<str name="dir">${}</str>
<!-- realtime get handler, guaranteed to return the latest stored fields
of any document, without the need to commit or open a new searcher. The current
implementation relies on the updateLog feature being enabled. -->
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
<requestDispatcher handleSelect="true" >
<!-- Increased to support /update/speeches end point -->
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="20480" formdataUploadLimitInKB="20480" />
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
<lst name="defaults">
<str name="echoParams">all</str>
<str name="df">content_ocr</str>
<arr name="last-components">
<requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<requestHandler name="/update/extract"
class="solr.extraction.ExtractingRequestHandler" >
<str name="parseContext.config">parseContext.xml</str>
<lst name="defaults">
<str name="lowernames">true</str>
<str name="uprefix">attr_</str>
<str name="multipartUploadLimitInKB">20480</str> Limit to 20 MB PDF
<requestHandler name="/update/speeches"
class="solr.extraction.ExtractingRequestHandler" >
<str name="parseContext.config">parseContext.xml</str>
<lst name="defaults">
<!-- capture link hrefs but ignore div attributes -->
<!--str name="captureAttr">true</str>
<str name="capture">div</str>
<str name="fmap.div">attr_div</str>
<str name="fmap.span">attr_span</str-->
<!--str name="fmap.div">ignored_</str-->
<!--str name="lowernames">true</str-->
<str name="uprefix">attr_</str>
<str name="multipartUploadLimitInKB">20480</str> <!--Limit to 20 MB PDF-->
<str name="update.chain">process-speech-from-extracted-text</str>
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
<lst name="defaults">
<str name="echoParams">all</str>
<updateRequestProcessorChain name="process-speech-from-extracted-text">
<processor class="solr.StatelessScriptUpdateProcessorFactory">
<arr name="script">
<str name="script">process-speech.js</str>
<!--str name="script"></str-->
<lst name="params">
<bool name="a_bool_value">true</bool>
<int name="and_int_value">3</int>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
<!-- config for the admin interface -->
<searchComponent class="com.o19s.payloads.component.PayloadComponent" name="payload" />
<!-- Highlighting Component
<searchComponent class="solr.HighlightComponent" name="highlight">
<!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case -->
<fragmenter name="gap"
<lst name="defaults">
<int name="hl.fragsize">100</int>
<!-- A regular-expression-based fragmenter
(for sentence extraction)
<fragmenter name="regex"
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
<!-- Configure the standard formatter -->
<formatter name="html"
<!-- Configure the standard encoder -->
<encoder name="html"
class="solr.highlight.HtmlEncoder" />
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple"
<!-- Configure the single fragListBuilder -->
<fragListBuilder name="single"
<!-- Configure the weighted fragListBuilder -->
<fragListBuilder name="weighted"
<!-- default tag FragmentsBuilder -->
<fragmentsBuilder name="default"
<lst name="defaults">
<str name="hl.multiValuedSeparatorChar">/</str>
<!-- multi-colored tag FragmentsBuilder -->
<fragmentsBuilder name="colored"
<lst name="defaults">
<str name="hl.tag.pre"><![CDATA[
<b style="background:yellow">,<b style="background:lawgreen">,
<b style="background:aquamarine">,<b style="background:magenta">,
<b style="background:palegreen">,<b style="background:coral">,
<b style="background:wheat">,<b style="background:khaki">,
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
<str name=""><![CDATA[</b>]]></str>
<boundaryScanner name="default"
<lst name="defaults">
<str name="">10</str>
<str name="">.,!? &#9;&#10;&#13;</str>
<boundaryScanner name="breakIterator"
<lst name="defaults">
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
<str name="">WORD</str>
<!-- language and country are used when constructing Locale object. -->
<!-- And the Locale object will be used when getting instance of BreakIterator -->
<str name="">en</str>
<str name="">US</str>
You can’t perform that action at this time.