# Testing queries

source: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html

In [58]:
from elasticsearch import Elasticsearch

def connect_elasticsearch():
    _es = None
    _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    if _es.ping():
        print('Connected')
    else:
        print('Warning! No connection!')
    return _es

def findNbest(result, n):
    bestLabels = []
    for i in range(n):
        bestLabels.append(result["hits"]["hits"][i]["_source"]["label"])
    return bestLabels

def printResults(query, n):
    es = connect_elasticsearch()
    res = es.search(index = "ontology_index", body = query)
    es.close()

    return findNbest(res, n)

In [24]:
article_name = "Big Data Analytics for Advanced Viticulture"

In [76]:
query = {'query': {
    "match": {"label" : article_name}
    }
}

printResults(query, 10)

Connected


[['Big data'],
 ['Data mining big data'],
 ['Big data data analysis'],
 ['Big data management'],
 ['Big data architecture'],
 ['Big data processing'],
 ['Big spatial data'],
 ['Geospatial big data'],
 ['Multimedia big data'],
 ['Big data security']]

In [60]:
query = {'query': {
    "more_like_this": {"fields" : ["label", "related", "broader"],
                       "like" : article_name,
                       "min_term_freq" : 1,
                       "max_query_terms" : 12}
    }
}

printResults(query, 10)

Connected


[['Small data'],
 ['Business analytics'],
 ['Prescriptive analytics'],
 ['Data analysis'],
 ['Behavioral analytics'],
 ['Data science'],
 ['Analytics'],
 ['Competitive intelligence'],
 ['Data discovery'],
 ['Data visualization']]

In [61]:
query = {
  "query": {
    "multi_match" : {
      "query":    article_name, 
      "fields" : ["label", "related", "broader"]
    }
  }
}

printResults(query, 10)

# najlepszy wynik ma  'broader': ['Big data', 'Analytics']

Connected


[['Urban analytics'],
 ['Healthcare analytics'],
 ['Real time analytics'],
 ['Small data'],
 ['Prescriptive analytics'],
 ['Business analytics'],
 ['Data analysis'],
 ['Behavioral analytics'],
 ['Data science'],
 ['Analytics']]

In [77]:
body = {
  "query": {
    "multi_match" : {
      "query":    article_name, 
      "fields" : ["label^3", "related", "broader"]
    }
  }
}

printResults(query, 10)

Connected


[['Big data'],
 ['Data mining big data'],
 ['Big data data analysis'],
 ['Big data management'],
 ['Big data architecture'],
 ['Big data processing'],
 ['Big spatial data'],
 ['Geospatial big data'],
 ['Multimedia big data'],
 ['Big data security']]

In [63]:
query = {
  "query": {
    "multi_match" : {
      "query":      article_name,
      "type":       "best_fields",
      "fields":     ["label^3", "related", "broader"],
      "tie_breaker": 0.3
    }
  }
}

printResults(query, 10)

Connected


[['Big data'],
 ['Big data data analysis'],
 ['Big data architecture'],
 ['Multimedia big data'],
 ['Big data mining'],
 ['Data mining big data'],
 ['Big data management'],
 ['Big data processing'],
 ['Big spatial data'],
 ['Geospatial big data']]

In [67]:
query = {
  "query": {
    "multi_match" : {
      "query":      article_name,
      "type":       "most_fields",
      "fields":     ["label^3", "related", "broader"]
    }
  }
}

printResults(query, 10)

Connected


[['Prescriptive analytics'],
 ['Big data'],
 ['Analytics'],
 ['Social analytics'],
 ['Learning analytics'],
 ['Big data data analysis'],
 ['Cultural analytics'],
 ['Big data architecture'],
 ['Multimedia big data'],
 ['Big data mining']]

In [74]:
query = {
  "query": {
    "multi_match" : {
      "query":      article_name,
      "type":       "bool_prefix",
      "fields":     ["label^4", "related", "broader^2"]
    }
  }
}

printResults(query, 10)

Connected


[['Prescriptive analytics'],
 ['Big data data analysis'],
 ['Big data'],
 ['Urban analytics'],
 ['Healthcare analytics'],
 ['Big data architecture'],
 ['Multimedia big data'],
 ['Big data mining'],
 ['Social analytics'],
 ['Social data analytics']]

## Searching using also an abstract

In [98]:
#article_name = "Brain Tumor Detection from MRI using Adaptive Thresholding and Histogram based Techniques"
#abstract = "This paper depicts a computerized framework that can distinguish brain tumor and investigate the diverse highlights of the tumor. Brain tumor segmentation means to isolated the unique tumor tissues, for example, active cells, edema and necrotic center from ordinary mind tissues of WM, GM, and CSF. However, manual segmentation in magnetic resonance data is a timeconsuming task. We present a method of automatic tumor segmentation in magnetic resonance images which consists of several steps. The recommended framework is helped by image processing based technique that gives improved precision rate of the cerebrum tumor location along with the computation of tumor measure. In this paper, the location of brain tumor from MRI is recognized utilizing adaptive thresholding with a level set and a morphological procedure with histogram. Automatic brain tumor stage is performed by using ensemble classification. Such phase classifies brain images into tumor and non-tumors using Feed Forwarded Artificial neural network based classifier. For test investigation, continuous MRI images gathered from 200 people are utilized. The rate of fruitful discovery through the proposed procedure is 97.32 percentage accurate."


# article_name = "Introduction to the Special Issue on Role of Scalable Computing and Data Analytics in Evolution of Internet of Things"
# abstract = """The evolution of Internet of Things has given way to a Smart World where there is an improved integration of devices, systems and processes in humans through all pervasive connectivity. Anytime, anywhere connection and transaction is the motto of the Internet of things which brings comfort to the users and sweeps the problem of physical boundary out of the way. Once it has come into the purview of developers, new areas have been identified and new applications have been introduced. Small wearables which can track your health to big automated vehicles which can move from one place to another self navigating without human intervention are the order of the day. This has also brought into existence a new technology called cloud, since with IoT comes a large number of devices connected to the internet continuously pumping data into the cloud for storage and processing. Another area benefited from the evolution of IoT is the wireless and wired connectivity through a wide range of connectivity standards.
# As with any technology, it has also created a lot of concerns regarding the security, privacy and ethics.
# Data protection issues created by new technologies are a threat which has been recognized by developers, public and also the governing body long back. The complexity of the system arises because of the various sensors and technologies which clearly tell the pattern of the activities of the individual as well an organization making us threat prone. Moreover, the volume of the data in the cloud makes it too difficult to recognize the privacy requirement of the data or to segregate open data from private data. Data analytics is another technology which supposedly increases the opportunity of increasing business by studying this private data collected from IoT and exploring ways to monetize them. It also helps the individual by recognizing their priorities and narrowing their search. But the data collected are real world data and aggregation of this data in the cloud is an open invitation to the hackers to study about the behaviors of the individuals.
# The special issues of Scalable Computing has attract related to the Role of Scalable Computing and Data Analytics in Evolution of Internet of Things has attracted 28 submissions from which were selected 12."""


# article_name = "An Ensemble Integrated Security System with Cross Breed Algorithm"
# abstract = """Blockchain and IoT are two technologies are most widely popular in present scenario, but technologies are more complicated. The blockchain used to transforms storage and data analysis. In recent years, the blockchain is at the heart of computer technologies. It is a cryptographically secure distributed database technology for storing and transmitting information. Various attacks are done in many networks. Many research articles discussed about the security issues over the IoT based secure using block chain technology. In this paper, an Ensemble Integrated Security System (EISS) is introduced to improve the security for the heterogeneous network which consists of normal and abnormal nodes which is processed with the block chain, IoT. Results
# show the performance of the OUATH-2 and EISS algorithm."""

article_name = "Background Modelling using a Q-Tree Based Foreground Segmentation"
abstract = """Background modelling is an empirical part in the procedure of foreground mining of idle and moving objects. The foreground object detection has become a challenging phenomenon due to intermittent objects, intensity variation, image artefact and dynamic background in the video analysis and video surveillance applications. In the video surveillances application, a large amount of data is getting processed by everyday basis. Thus it needs an efficient background modelling technique which could process those larger sets of data which promotes effective foreground detection. In this paper, we presented a renewed background modelling method for foreground segmentation. The main objective of the work is to perform the foreground extraction only in
the intended region of interest using proposed Q-Tree algorithm. At most all the present techniques consider their updates to the pixels of the entire frame which may result in inefficient foreground detection with a quick update to slow moving objects. The proposed method contract these defect by extracting the foreground object by controlling the region of interest (the region only where the background subtraction is to be performed) and thereby reducing the false positive and false negative. The extensive experimental results and the evaluation parameters of the proposed approach with the state of art method were compared against the most recent background subtraction approaches. Moreover, we use challenge change detection dataset and the efficiency of our method is analyzed in different environmental conditions (indoor, outdoor) from the CDnet2014 dataset and additional real time videos. The experimental results were satisfactorily verified the strengths and weakness of proposed method against the existing state-of-the-art background modelling methods."""

In [99]:
# compare with only title

query = {
  "query": {
    "multi_match" : {
      "query":    article_name, 
      "fields" : ["label^3", "related", "broader"]
    }
  }
}

printResults(query, 10)

Connected


[['Foreground-background'],
 ['Minimum spanning tree-based segmentation'],
 ['Tree based'],
 ['(a,b)-tree'],
 ['Foreground detection'],
 ['Graph based segmentation'],
 ['Model based segmentation'],
 ['Tree based regression'],
 ['Q band'],
 ['Q-Warrior']]

In [101]:
query = {
  "query": {
    "dis_max": {
      "queries": [
        {
          "multi_match" : {
          "query":    article_name, 
          "fields" : ["label^3", "related", "broader"]
    }
        },
        {
          "multi_match" : {
          "query":    abstract, 
          "fields" : ["label^2", "related", "broader^2"]
          }
        }
      ]
    }
  }
}

printResults(query, 10)

Connected


[['Programming in the large and programming in the small'],
 ['The Intersect'],
 ['The Internet'],
 ['The Blob'],
 ['The Vanguard Method'],
 ['Law of the instrument'],
 ['Wisdom of the crowd'],
 ['March of the Machines'],
 ['Outline of the Internet'],
 ['Confusion of the inverse']]

In [102]:
query = {
  "query": {
    "dis_max": {
      "queries": [
        {
          "multi_match" : {
          "query":      article_name,
          "type":       "best_fields",
          "fields":     ["label^3", "related", "broader"],
          "tie_breaker": 0.3
         }
        },
        {
          "multi_match" : {
          "query":      abstract,
          "type":       "best_fields",
          "fields":     ["label^3", "related", "broader"],
          "tie_breaker": 0.3
         }
        }
      ]
    }
  }
}

printResults(query, 10)

Connected


[['Confusion of the inverse'],
 ['Suicide and the Internet'],
 ['Abundances of the elements'],
 ['Programming in the large and programming in the small'],
 ['Sociology of the Internet'],
 ['The Internet'],
 ['Outline of the Internet'],
 ['The Intersect'],
 ['The Blob'],
 ['The Vanguard Method']]

In [103]:
query = {
  "query": {
    "dis_max": {
      "queries": [
        {
          "multi_match" : {
          "query":      article_name,
          "type":       "most_fields",
          "fields":     ["label^3", "related", "broader"],
          "tie_breaker": 0.5
         }
        },
        {
          "multi_match" : {
          "query":      abstract,
          "type":       "most_fields",
          "fields":     ["label^3", "related", "broader"],
          "tie_breaker": 0.5
         }
        }
      ]
    }
  }
}

printResults(query, 10)

Connected


[['Suicide and the Internet'],
 ['Confusion of the inverse'],
 ['Abundances of the elements'],
 ['The Victorian Internet'],
 ['Sociology of the Internet'],
 ['Programming in the large and programming in the small'],
 ['Outline of the Internet'],
 ['Model in the loop'],
 ['Software in the loop'],
 ['The Internet']]