In [1]:
from datetime import datetime
from elasticsearch import helpers, Elasticsearch
import csv
import csv2es



In [2]:
es = Elasticsearch(timeout=1000)

In [31]:
es.indices.create(
    index='index_2',
    body={
      "settings":{
      "analysis":{
         "analyzer":{
            "my_analyzer":{ 
               "type":"custom",
               "tokenizer":"standard",
               "filter":[
                  "lowercase"
               ]
            },
            "my_stop_analyzer":{ 
               "type":"custom",
               "tokenizer":"standard",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            }
         },
         "filter":{
            "english_stop":{
               "type":"stop",
               "stopwords":"_english_"
            }
         }
      }
   },
   "mappings":{
       "properties":{
          "en_label": {
             "type":"text",
             "analyzer":"my_analyzer", 
             "search_analyzer":"my_stop_analyzer", 
             "search_quote_analyzer":"my_analyzer" 
         }
       }
       }
    },
    ignore=400
)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'index_2'}

In [32]:
es.indices.create(
    index='index_3',
    body={
      "settings": {
        "analysis": {
          "filter": {
            "autocomplete_filter": {
              "type": "edge_ngram",
              "min_gram": 1,
              "max_gram": 20
            }
          },
          "analyzer": {
            "autocomplete": { 
              "type": "custom",
              "tokenizer": "standard",
              "filter": [
                "lowercase",
                "autocomplete_filter"
              ]
            }
          }
        }
      },
      "mappings": {
        "properties": {
          "en_label": {
            "type": "text",
            "analyzer": "autocomplete", 
            "search_analyzer": "standard" 
          }
        }
      }
    },
    ignore=400
)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'index_3'}

In [33]:
with open('./Data/parsed_categories.tsv') as f:
    reader = csv.DictReader(f, delimiter='\t')
    helpers.bulk(es, reader, index='index_1')

In [34]:
with open('./Data/parsed_categories.tsv') as f:
    reader = csv.DictReader(f, delimiter='\t')
    helpers.bulk(es, reader, index='index_2')

In [35]:
with open('./Data/parsed_categories.tsv') as f:
    reader = csv.DictReader(f, delimiter='\t')
    helpers.bulk(es, reader, index='index_3')

In [27]:
def search(index, query, showUrls, fields=["en_label", "hu_label", "sk_label"]):
    res = es.search(index=index, body={
    "query" : {
        "query_string" : {
          "query" : query,
          "fields"  : fields
            }
        }
    })
    index = 1
    for r in res['hits']['hits']:
        print(index, ". result")
        print("English Label: ", r['_source']['en_label'])
        print("English Broader Categories: ", r['_source']['en_broader'])
        print("Hungarian Label: ", r['_source']['hu_label'])
        print("Hungarian Broader Categories: ", r['_source']['hu_broader'])
        print("Slovak Label: ", r['_source']['sk_label'])
        print("Slovak Broader Categories: ", r['_source']['sk_broader'])
        if(showUrls): 
            print("English DBPedia Uri: ", r['_source']['en_db'])
            print("English Wikipedia Uri: ", r['_source']['en_wiki'])
            print("Hungarian DBPedia Uri: ", r['_source']['hu_db'])
            print("Hungarian Wikipedia Uri: ", r['_source']['hu_wiki'])
            print("Slovak DBPedia Uri: ", r['_source']['sk_db'])
            print("Slovak Wikipedia Uri: ", r['_source']['sk_wiki'])
        print()
        index += 1

In [36]:
search("index_1", "Finančná Matematika", True, ["sk_label"])

1 . result
English Label:  Mathematical finance
English Broader Categories:  Applied mathematics, Fields of application of statistics, Fields of finance, Actuarial science, Mathematical science occupations
Hungarian Label:  
Hungarian Broader Categories:  
Slovak Label:  Finančná matematika
Slovak Broader Categories:  Aplikovaná matematika, Financie
English DBPedia Uri:  http://dbpedia.org/resource/Category:Mathematical_finance
English Wikipedia Uri:  http://en.wikipedia.org/wiki/Category:Mathematical_finance?oldid=546540754
Hungarian DBPedia Uri:  
Hungarian Wikipedia Uri:  
Slovak DBPedia Uri:  http://sk.dbpedia.org/resource/Kategória:Finančná_matematika
Slovak Wikipedia Uri:  http://sk.wikipedia.org/wiki/Kategória:Finančná_matematika?oldid=5413451

2 . result
English Label:  Mathematics
English Broader Categories:  Main topic classifications, Abstraction, Formal sciences, Structure
Hungarian Label:  Matematika
Hungarian Broader Categories:  Tudomány
Slovak Label:  Matematika
Slovak 