In [8]:
import pyfreeling as freeling

## -----------------------------------------------
## Do whatever is needed with analyzed sentences
## -----------------------------------------------
def ProcessSentences(ls):

    # for each sentence in list
    for s in ls :
        # for each word in sentence
        for w in s :
            # print word form  
            print("word '"+w.get_form()+"'")
            # print possible analysis in word, output lemma and tag
            print("  Possible analysis: {",end="")
            for a in w :
                print(" ("+a.get_lemma()+","+a.get_tag()+")",end="")
            print(" }")
            #  print analysis selected by the tagger 
            print("  Selected Analysis: ("+w.get_lemma()+","+w.get_tag()+")")
        # sentence separator
        print("") 
      
    #print()
    #print(list(w.get_form() for w in lw))
    #print(list(w.get_lemma() for w in lw))
    
    print()
    print(list([w.get_form() for w in s.get_words()] for s in ls))
    print()
    print(list([w.get_lemma() for w in s.get_words()] for s in ls))
    print()
    print(list([w.get_tag() for w in s.get_words()] for s in ls))
    
    # build a list of feature vectors
    print()
    print(list([(w.get_form(),w.get_lemma(),w.get_tag()) for w in s.get_words()] for s in ls))
    
    

## -----------------------------------------------
## Set desired options for morphological analyzer
## -----------------------------------------------
def my_maco_options(lang,lpath) :

    # create options holder 
    opt = freeling.maco_options(lang);

    # Provide files for morphological submodules. Note that it is not 
    # necessary to set file for modules that will not be used.
    opt.UserMapFile = "";
    opt.LocutionsFile = lpath + "locucions.dat"; 
    opt.AffixFile = lpath + "afixos.dat";
    opt.ProbabilityFile = lpath + "probabilitats.dat"; 
    opt.DictionaryFile = lpath + "dicc.src";
    opt.NPdataFile = lpath + "np.dat"; 
    opt.PunctuationFile = lpath + "../common/punct.dat"; 
    return opt;


freeling.util_init_locale("default")
lang= "en"
ipath="/usr/local"
lpath=ipath + "/share/freeling/" + lang + "/"
tk=freeling.tokenizer(lpath+"tokenizer.dat")
sp=freeling.splitter(lpath+"splitter.dat")

# create the analyzer with the required set of maco_options  
morfo=freeling.maco(my_maco_options(lang,lpath));
#  then, (de)activate required modules   
morfo.set_active_options (False,  # UserMap 
                          True,  # NumbersDetection,  
                          True,  # PunctuationDetection,   
                          True,  # DatesDetection,    
                          True,  # DictionarySearch,  
                          True,  # AffixAnalysis,  
                          False, # CompoundAnalysis, 
                          True,  # RetokContractions,
                          True,  # MultiwordsDetection,  
                          True,  # NERecognition,     
                          False, # QuantitiesDetection,  
                          True); # ProbabilityAssignment                 

# create tagger
tagger = freeling.hmm_tagger(lpath+"tagger.dat",True,2)


text= "This is a test. It's the first time I use freeling."
lw = tk.tokenize(text)

ls = sp.split(lw)
ls = morfo.analyze(ls)
ls = tagger.analyze(ls)

ProcessSentences(ls)

"""
    To embed in our code:
        1- function that initializes an object will all components inside
        2- this function performs tokenizing, sentence splitting, morfo and tag
        3- generates the vector of features 
        5- then BIO tagger works over that (adapt)
"""

word 'This'
  Possible analysis: { (this,DT) (this,RB) }
  Selected Analysis: (this,DT)
word 'is'
  Possible analysis: { (be,VBZ) }
  Selected Analysis: (be,VBZ)
word 'a'
  Possible analysis: { (a,DT) (a,NN) (1,Z) }
  Selected Analysis: (a,DT)
word 'test'
  Possible analysis: { (test,NN) (test,VB) (test,VBP) }
  Selected Analysis: (test,NN)
word '.'
  Possible analysis: { (.,Fp) }
  Selected Analysis: (.,Fp)

word 'It'
  Possible analysis: { (it,PRP) }
  Selected Analysis: (it,PRP)
word ''s'
  Possible analysis: { (be,VBZ) (have,VBZ) }
  Selected Analysis: (be,VBZ)
word 'the'
  Possible analysis: { (the,DT) }
  Selected Analysis: (the,DT)
word 'first'
  Possible analysis: { (1,JJ) (first,RB) (first,NN) }
  Selected Analysis: (1,JJ)
word 'time'
  Possible analysis: { (time,NN) (time,VB) (time,VBP) }
  Selected Analysis: (time,NN)
word 'I'
  Possible analysis: { (i,PRP) }
  Selected Analysis: (i,PRP)
word 'use'
  Possible analysis: { (use,NN) (use,VB) (use,VBP) }
  Selected Analysis: (us