In [4]:
import nltk
from nltk.corpus import wordnet
import requests
import json

nltk.download('wordnet')

def get_synonyms(word):
    synonyms = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return set(synonyms)

def get_synonyms_datamuse(word):
    base_url = "https://api.datamuse.com/words"
    params = {"ml": word}
    
    response = requests.get(base_url, params=params)
    results = response.json()

    synonyms = [result["word"] for result in results]
    return set(synonyms)

def process_information_needs(information_needs):
    result = []

    for info_need in information_needs:
        query = info_need.get("query", "")
        words = query.split()
        print(words)
        for word in words:
            synonyms_for_word = {}
            synonyms_1 = list(get_synonyms(word))
            synonyms_2 = list(get_synonyms_datamuse(word))[:10]
            if synonyms_1 and synonyms_2:
                synonyms_for_word[word] = synonyms_1 + synonyms_2
                # sort alphabetically
                synonyms_for_word[word].sort()

            result.append(synonyms_for_word)

    return result

with open("queries.json", "r") as json_file:
    data = json.load(json_file)

information_needs = data.get("information_needs", [])

merged_synonyms = process_information_needs(information_needs)
merged_synonyms


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rnrib\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['artificial', 'intelligence']
['fight', 'fights', 'against', 'racism', 'discrimination']
['novel', 'AND', 'post-apocalyptic']
['american', 'civil', 'war']


[{'artificial': ['artificial',
   'bleached',
   'celluloid',
   'colored',
   'contrived',
   'faux',
   'hokey',
   'legal',
   'moral',
   'nonnatural',
   'stilted',
   'stilted',
   'stylized',
   'synthetic',
   'unreal']},
 {'intelligence': ['brainpower',
   'csis',
   'espionage',
   'information',
   'intellectual',
   'intelligence',
   'intelligence_activity',
   'intelligence_agency',
   'intelligence_information',
   'intelligence_operation',
   'intelligence_service',
   'intelligents',
   'intelligentsia',
   'news',
   'run-up',
   'security',
   'tidings',
   'tidings',
   'word']},
 {'fight': ['agitate',
   'argue',
   'battle',
   'beaten',
   'breach',
   'campaign',
   'combat',
   'competitiveness',
   'conflict',
   'conquer',
   'contend',
   'crusade',
   'defend',
   'drive',
   'engagement',
   'fight',
   'fight_back',
   'fight_down',
   'fighting',
   'oppose',
   'press',
   'push',
   'push',
   'scrap',
   'scuffle',
   'strife',
   'struggle',
   'stru

In [5]:
# output merged synonyms to txt file
with open("synonyms_short.txt", "w") as f:
    for synonyms_for_word in merged_synonyms:
        for word in synonyms_for_word.values():
            f.write(", ".join(word))
            f.write("\n")