## Preparation

### Installation

In [None]:
# Building sources
!git clone git@github.com:oaqa/FlexNeuART.git

In [None]:
!./build.sh

### Downloading demo data

In [None]:
!wget boytsov.info/datasets/flecsneurt-demo-2020-04-07.tar.bz2 

In [None]:
# Unpacking it
!tar jxvf flecsneurt-demo-2020-04-07.tar.bz2

In [None]:
# Creating a Lucene index
!scripts/index/create_lucene_index.sh squad

In [None]:
#!creating a forward index for two fields:
# text is a parsed text field
# text_raw is a raw text field that keeps the text as is
# -clean removes all previous forward indices
!scripts/index/create_fwd_index.sh squad mapdb  \
                               "text:parsedText text_unlemm:raw" \
                               -clean

## API demo

In [1]:
from scripts.pyflexneuart.setup import *

In [2]:
# add Java JAR to the class path
configure_classpath('target')

In [3]:
# create a resource manager
resource_manager=create_featextr_resource_manager('collections/squad/forward_index')

### Retrieval

In [4]:
from scripts.pyflexneuart.cand_provider import *
# create a candidate provider/generator
cand_prov = create_cand_provider(resource_manager, PROVIDER_TYPE_LUCENE, 'collections/squad/lucene_index')

In [6]:
run_query(cand_prov, 20, "university notre dame student run".split())

(1961,
 [CandidateEntry(doc_id='@4309', score=27.225019454956055),
  CandidateEntry(doc_id='@4323', score=27.155115127563477),
  CandidateEntry(doc_id='@2608', score=26.603111267089844),
  CandidateEntry(doc_id='@4310', score=26.364826202392578),
  CandidateEntry(doc_id='@4303', score=26.196977615356445),
  CandidateEntry(doc_id='@4350', score=25.074060440063477),
  CandidateEntry(doc_id='@4346', score=24.675006866455078),
  CandidateEntry(doc_id='@4316', score=24.361064910888672),
  CandidateEntry(doc_id='@4336', score=23.92790985107422),
  CandidateEntry(doc_id='@4345', score=23.00181007385254),
  CandidateEntry(doc_id='@4320', score=22.964710235595703),
  CandidateEntry(doc_id='@2607', score=22.55484390258789),
  CandidateEntry(doc_id='@4325', score=22.466575622558594),
  CandidateEntry(doc_id='@4330', score=21.723785400390625),
  CandidateEntry(doc_id='@4348', score=21.596769332885742),
  CandidateEntry(doc_id='@4321', score=21.47646713256836),
  CandidateEntry(doc_id='@4338', scor

#### Forward index demo

In [None]:
from scripts.pyflexneuart.fwd_index import get_forward_index

### First let's play with a raw index that keeps ony unparsed text

In [None]:
raw_indx = get_forward_index(resource_manager, 'text_unlemm')

In [None]:
# the raw flag is set
raw_indx.is_raw

In [None]:
raw_indx.get_doc_raw('@4302')

### A parsed index has more info

In [None]:
parsed_indx = get_forward_index(resource_manager, 'text')

In [None]:
# here is_raw is False
parsed_indx.is_raw

In [None]:
parsed_indx.get_doc_parsed('@4302')

In [None]:
# Let's extract the first document word and its info
parsed_indx.get_word_by_id(10063), parsed_indx.get_word_entry_by_id(10063)