In [43]:
import nltk
from nltk.corpus import wordnet
import requests
import json

nltk.download('wordnet')

def get_synonyms(word):
    synonyms = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return set(synonyms)

def get_synonyms_datamuse(word):
    base_url = "https://api.datamuse.com/words"
    params = {"ml": word}
    
    response = requests.get(base_url, params=params)
    results = response.json()

    synonyms = [result["word"] for result in results]
    return set(synonyms)

def process_information_needs(information_needs):
    result = []

    for info_need in information_needs:
        query = info_need.get("query", "")
        words = query.split()
        print(words)
        for word in words:
            synonyms_for_word = {}
            synonyms_1 = list(get_synonyms(word))
            synonyms_2 = list(get_synonyms_datamuse(word))
            if synonyms_1 and synonyms_2:
                synonyms_for_word[word] = synonyms_1 + synonyms_2
                # sort alphabetically
                synonyms_for_word[word].sort()

            result.append(synonyms_for_word)

    return result

with open("queries.json", "r") as json_file:
    data = json.load(json_file)

information_needs = data.get("information_needs", [])

merged_synonyms = process_information_needs(information_needs)
merged_synonyms


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rnrib\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['artificial', 'intelligence']
['fight', 'fights', 'against', 'racism', 'discrimination']
['novel', 'AND', 'apocalyptic']
['american', 'civil', 'war']


[{'artificial': ['affected',
   'anthropogenic',
   'arbitrary',
   'arranged',
   'artifical',
   'artifically',
   'artifice',
   'artificial',
   'artificials',
   'artisanal',
   'bioengineered',
   'biogenetic',
   'biological',
   'bionic',
   'biotechnical',
   'bleached',
   'bogus',
   'cardboard',
   'celluloid',
   'chemical',
   'colored',
   'constructed',
   'contrivable',
   'contrived',
   'contrived',
   'conventional',
   'conventionalized',
   'crt',
   'cybernetic',
   'dummy',
   'dyed',
   'engineered',
   'ersatz',
   'extraneous',
   'factitious',
   'fake',
   'false',
   'faux',
   'fictitious',
   'formula',
   'hokey',
   'human',
   'human-made',
   'humanmade',
   'illusional',
   'illusionary',
   'illusory',
   'imitation',
   'impractical',
   'inartificial',
   'induced',
   'industrial',
   'industrialized',
   'inorganic',
   'intrinsic',
   'labor',
   'labour',
   'legal',
   'lifelike',
   'machine',
   'made-up',
   'man-made',
   'manmade',
   '

In [45]:
# output merged synonyms to txt file
with open("synonyms.txt", "w") as f:
    for synonyms_for_word in merged_synonyms:
        for word in synonyms_for_word.values():
            f.write(", ".join(word))
            f.write("\n")