In [1]:
import os
# Change it to your drive path where this notebook located.
drive_path = '<path_to_this_notebook>'
os.chdir(drive_path)
# from EnglishDictionary import EnglishDictionary
from HardwordParser import HardwordParser
from HeuristicSoftwordParser import HeuristicParser
from SemanticSoftwordParser import SemanticSoftwordParser

hard_word_parser = HardwordParser()

heruristic_parser = HeuristicParser()

my_openai_apikey = "<your_openai_apikey>"
semantic_parser = SemanticSoftwordParser(my_openai_apikey)

# Poor man's pipeline

In [2]:
variable_name = "exampleVarName_syslogckpt"
softword_list = hard_word_parser.parse_hard_word(variable_name)
print(softword_list)

['example', 'Var', 'Name', 'syslogckpt']


In [3]:
split = []
for softword in softword_list:
    # first try the heuristic parser to save money
    interpretation = heruristic_parser.parse(softword)

    # try the semantic parser if heuristic parser returns None
    if not interpretation:
        context = f'This word is used in the Python programming identifier name "{variable_name}".'
        interpretation = semantic_parser.parse(softword, context)
        
    split.extend(interpretation['interpretation']['split'])

split

[{'softword': 'example', 'type': 'dictionary', 'expansion': 'example'},
 {'softword': 'Var', 'type': 'abbreviation', 'expansion': 'variable'},
 {'softword': 'Name', 'type': 'dictionary', 'expansion': 'Name'},
 {'softword': 'sys', 'type': 'abbreviation', 'expansion': 'system'},
 {'softword': 'log', 'type': 'dictionary', 'expansion': 'log'},
 {'softword': 'ckpt', 'type': 'abbreviation', 'expansion': 'checkpoint'}]

# Rich man's pipeline

In [4]:
variable_name = "another_exampleformyglouriouskinglbj"
softword_list = hard_word_parser.parse_hard_word(variable_name)
print(softword_list)

['another', 'exampleformyglouriouskinglbj']


In [5]:
split = []
for softword in softword_list:
    # directly use the semantic parser
    context = f'This word is used in the Python programming identifier name "{variable_name}".'
    interpretation = semantic_parser.parse(softword, context)
    split.extend(interpretation['interpretation']['split'])

split

[{'softword': 'another', 'type': 'dictionary', 'expansion': 'another'},
 {'softword': 'example', 'type': 'dictionary', 'expansion': 'example'},
 {'softword': 'for', 'type': 'dictionary', 'expansion': 'for'},
 {'softword': 'my', 'type': 'dictionary', 'expansion': 'my'},
 {'softword': 'glourious', 'type': 'typo', 'expansion': 'glorious'},
 {'softword': 'king', 'type': 'dictionary', 'expansion': 'king'},
 {'softword': 'lbj', 'type': 'abbreviation', 'expansion': 'lbj'}]

# Additional features

In [6]:
# The hardword parser allows you to decide the naming convention of the identifier name
print(f"NamingConvention: {hard_word_parser.classify_naming_convention("camelCaseExample")}")

NamingConvention: camelCase


In [7]:
# The semantic parser can show you the reasoning process of the softword you just parsed
print(semantic_parser.get_reasoning_process())

The hardword "exampleformyglouriouskinglbj" is quite long and appears to be a concatenation of multiple semantic components. Let's break it down:

- "example" is a dictionary word.
- "for" is a dictionary word.
- "my" is a dictionary word.
- "glourious" appears to be a typo of the dictionary word "glorious".
- "king" is a dictionary word.
- "lbj" is most likely an abbreviation or acronym. LBJ commonly refers to Lyndon B. Johnson, a former US president, so it's an abbreviation.

Thus, the most likely interpretation is: "example" + "for" + "my" + "glorious" (corrected from typo) + "king" + "lbj".

Final interpreted phrase: "example for my glorious king lbj".

This seems like a meaningful phrase that makes sense as an identifier name.

I will now call the interpret_hardword function with these components.
