In [3]:
import nltk
nltk.download('wordnet', "/home/zampinojosh/src/z/src/server/wordnet")

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/zampinojosh/src/z/src/server/wordnet...


True

In [58]:
from nltk.corpus import wordnet as wn
import string

token_map = {
    'i': '1',
    'me': '1',
    'you': '2',
    'question': '3',
    'less': '4',
    'more': '5',
    'answer': '6',
    'them': '7',
    'measure': '8'    
    # Additional mappings can be added here
}

punctuation_map = {
    '?': '3',  # Token for "question"
    '!': '9',  # Token for "emphasis"
    # You can add more mappings if other punctuation marks carry specific meanings
}

# def expand_synonyms(word):
#     """Generate synonyms for a given word using WordNet, including the original word."""
#     synonyms = {word}  # Start with the original word in the set
#     for synset in wn.synsets(word):
#         synonyms.update(lemma.name().replace('_', ' ') for lemma in synset.lemmas())
#     return synonyms

# def expand_synonyms(word, depth=1):
#     """Generate synonyms for a given word using WordNet, including the original word and second-order synonyms."""
#     def get_synonyms(term, current_depth):
#         if current_depth > depth:
#             return set()
#         synonyms = {term}
#         for synset in wn.synsets(term):
#             for lemma in synset.lemmas():
#                 synonym = lemma.name().replace('_', ' ')
#                 synonyms.add(synonym)
#                 if current_depth < depth:
#                     synonyms.update(get_synonyms(synonym, current_depth + 1))
#         return synonyms

#     return get_synonyms(word, 0)

def expand_synonyms(word, depth=3):
    """Generate synonyms for a given word using WordNet, including the original word and, conditionally, second-order synonyms."""
    def get_synonyms(term, current_depth):
        if current_depth > depth:
            return set()
        synonyms = set()
        has_expanded = False  # Flag to check if we have expanded to second-order
        for synset in wn.synsets(term):
            for lemma in synset.lemmas():
                synonym = lemma.name().replace('_', ' ')
                synonyms.add(synonym)
                # Expand further only if no synonyms were found at the first level (and we are not at max depth)
                if not has_expanded and current_depth < depth:
                    additional_synonyms = get_synonyms(synonym, current_depth + 1)
                    if additional_synonyms:
                        has_expanded = True  # Update flag when expansion occurs
                    synonyms.update(additional_synonyms)
        return synonyms

    return get_synonyms(word, 0)





def build_translation_dict():
    """Build a translation dictionary using predefined tokens and WordNet synonyms."""
    translation_dict = {}
    for word, token in token_map.items():
        for synonym in expand_synonyms(word):
            translation_dict[synonym.lower()] = token
            
    return translation_dict

translation_dict = build_translation_dict()

def translate_to_z(sentence):
    # Create a list to store translated tokens
    translated = []
    # Remove and translate punctuation using a modified approach
    words = sentence.split()
    for word in words:
        # Strip punctuation from the current word
        stripped_word = word.strip(string.punctuation).lower()
        # Translate the stripped word
        if stripped_word:
            translated.append(translation_dict.get(stripped_word, ''))
        # Check for trailing punctuation in the original word and translate if mapped
        for char in word:
            if char in punctuation_map:
                translated.append(punctuation_map[char])

    return ' '.join(translated)

# Example usage

sentences = [
    "I have a question for you.",
    "You have a question for me.",
    "Do you know?",
    "Question them.",
    "Do they know?",
    "ask.",
    "meet me and them soon"
]

# Function to format and print each sentence with its translation
def print_translations(sentences):
    for i, sentence in enumerate(sentences, 1):
        z_code = translate_to_z(sentence).replace(" ", "")
        print(f"{i}) Original: '{sentence}' -> Z code: '{z_code}'")

print_translations(sentences)


expanded_synonyms = " ".join(expand_synonyms("measure"))
print(f"Expanded synonyms for 'measure': {expanded_synonyms}")

1) Original: 'I have a question for you.' -> Z code: '13'
2) Original: 'You have a question for me.' -> Z code: '31'
3) Original: 'Do you know?' -> Z code: '63'
4) Original: 'Question them.' -> Z code: '3'
5) Original: 'Do they know?' -> Z code: '63'
6) Original: 'ask.' -> Z code: ''
7) Original: 'meet me and them soon' -> Z code: '1'
Expanded synonyms for 'measure': measure out amount assess appraise measuring stick beat cadence quantify evaluate quantity measuring mensurate value metre meter measuring rod step measure bar valuate touchstone bill standard mensuration criterion measurement
