#### search elastic search

### see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html

In [1]:
import requests
import json

#see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html

url = "http://13.52.183.126:9255/glassdoor_reviews/_search"

payload = json.dumps({
  "query": {
    "multi_match": {
      "query": "diversity inclusion",
      "fields" : [ "pros","cons" ],
        "operator":   "and"
    }
  }
})
headers = {
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code==200:
    data=json.loads(response.text)
    print('Total hits: ' + str(data['hits']['total']['value']))
    print('-------------------------')
    for item in data['hits']['hits']:
        print(item['_source'])
        print('')
else:
    print('error fetching data')
    print(response.text)

Total hits: 160
-------------------------
{'company': 'Blackrock', 'rating': '4.0', 'pros': 'They focus on career growth of their employees and welfare\r\nGood benefits', 'cons': 'diversity and inclusion not much', 'adviceManagement': '', 'dateReviewed': '5 May 2021', 'authorInfo': 'Associate', 'authorLocation': ''}

{'company': 'Facebook', 'rating': '4.0', 'pros': 'Stellar benefits\r\nHigh visibility work\r\nAwesome co-workers', 'cons': 'Management struggles with diversity and inclusion', 'adviceManagement': '', 'dateReviewed': '11 May 2021', 'authorInfo': 'Product Manager', 'authorLocation': 'Seattle, WA'}

{'company': 'Facebook', 'rating': '2.0', 'pros': 'Salary is quite high for this role.', 'cons': 'Diversity / Inclusion is a real problem.', 'adviceManagement': '', 'dateReviewed': '8 May 2019', 'authorInfo': 'Anonymous Employee', 'authorLocation': ''}

{'company': 'Facebook', 'rating': '4.0', 'pros': 'Culture, diversity & inclusion, benefits', 'cons': 'Internal moves are harder th

### Load word2vec saved model (the models are saved in the root directory)

In [7]:
from gensim.models import Word2Vec 
model = Word2Vec.load("word2vec.model_50_3_1")

### this is how you get the similar (thesaraus) words

In [8]:
model.wv.most_similar('diversity', topn=10)

[('inclusion', 0.7531678676605225),
 ('teamwork', 0.7366514205932617),
 ('empowerment', 0.7191998362541199),
 ('transparency', 0.7103386521339417),
 ('professionalism', 0.7089841961860657),
 ('collaboration', 0.6810402274131775),
 ('cultural', 0.6662629842758179),
 ('genuine', 0.6645629405975342),
 ('empathy', 0.6524288654327393),
 ('openness', 0.6520794034004211)]

### get the related words down a search tree upto depth 2

In [24]:
from collections import defaultdict
def get_related_words(word, size=10):
    return_dict=defaultdict(int)
    similar_words=model.wv.most_similar(word, topn=size)
    for w in similar_words:
        return_dict[w[0]]=1
        for subword in model.wv.most_similar(w[0], topn=size):
            return_dict[subword[0]]=1
    return [k for k in return_dict]
all_words=get_related_words('diversity')

In [25]:
all_words

['inclusion',
 'openness',
 'diversity',
 'empowerment',
 'cultural',
 'inexcusable',
 'equality',
 'sustainability',
 'honesty',
 'teamwork',
 'morals',
 'collaboration',
 'camaraderie',
 'cooperation',
 'mentorship',
 'ambience',
 'transparency',
 'mentors',
 'engagement',
 'professionalism',
 'involvement',
 'autonomy',
 'dedication',
 'intellectual',
 'accountability',
 'consistency',
 'empathy',
 'oversight',
 'communications',
 'micro-management',
 'clarity',
 'communication',
 'integrity',
 're-abuse',
 'mnanagers',
 'strutcure',
 'infrastructure',
 'alignment',
 'informal',
 'functionally',
 'partnership',
 'presence',
 'diversified',
 'multi',
 'ethos',
 '-goodies',
 'genuine',
 'thoughtful',
 'compassionate',
 'caring',
 'sincere',
 'responsive',
 'humble',
 'seasoned',
 'polite',
 'conscious',
 'considerate',
 'compassion',
 'thereof',
 'appreciation',
 'gender',
 'uniform',
 'knowledge-sharing',
 'inclusivity',
 '.thanks',
 'inclusiveness',
 'experimentation']