### This is used to test the Option-shortening Workflow

In [1]:
from src.general import extract_mcq_components, extract_correct_answer_letter

In [2]:
from src.option_shortening_helper import identify_longer_options, syntactic_analysis, calculate_length_range, generate_candidate_short_options, select_best_candidate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mcq = '''How do the id, ego, and superego interact in Freud's personality model?

A) The ego prioritizes the fulfillment of the id's desires.

B) The superego suppresses the ego entirely to enforce social rules.

C) The id harmonizes completely with the superego to reduce anxiety.

D) The ego balances the instinctive drives of the id with the moral standards of the superego.'''


In [4]:
mcq_answer = "D) The ego balances the instinctive drives of the id with the moral standards of the superego."

In [5]:
question, options = extract_mcq_components(mcq)

In [6]:
longer_option_index, longer_option_text = identify_longer_options(options)

In [7]:
longer_option_text

'The ego balances the instinctive drives of the id with the moral standards of the superego.'

In [8]:
if longer_option_index:
    print ("The question contains longer options that may need shortening")
else:
    print ("No longer options detected, no shortening needed")
    exit()

The question contains longer options that may need shortening


In [9]:
invocation_id = "test20250910"

In [10]:
identified_rule_meta = await syntactic_analysis(
    invocation_id=invocation_id,
    model="gpt-4o",
    temperature=0.3,
    question_stem=question,
    options=options)

INFO:root:Table 'syntactic_analysis_metadata' already exists.
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '[ROLE]:\nYou are a linguist and expert at syntactic analysis of educational content. \n\n[TASK]:\nThe user provides a multiple choice question, including the question stem and four answer options (A, B, C, D).\nYour task is to analyze the structural patterns in the options and identify common syntactic rules that can be used for option generation and modification.\n\n[GUIDELINES]:\nAnalyze the syntactic structure of these options and identify the common pattern that most options follow. Focus on:\n1. Sentence structure (Subject-Verb-Object patterns)\n2. Common starting words or phrases\n3. Parallel construction patterns\n4. Grammatical elements that appear consistently\n\nProvide a generalized syntactic rule that describes the common structure, similar to these exa

In [11]:
identified_rule = identified_rule_meta.get("syntactic_rule", "")
print(f"Identified syntactic rule: {identified_rule}")


Identified syntactic rule: [Component noun phrase] + [verb] + [complement detailing relationship/interaction]


In [12]:
min_length, max_length = calculate_length_range(options)

In [13]:
print(f"Length range for options: min_length={min_length}, max_length={max_length}")

Length range for options: min_length=7, max_length=18


In [14]:
shortened_options_candidates_meta = await generate_candidate_short_options(
        invocation_id=invocation_id,
        model="gpt-4o",
        options=options,
        option_to_shorten=longer_option_text,
        syntactic_rule=identified_rule,
        min_target=min_length,  
        max_target=max_length,
        table_name="candidate_shortening_metadata",
        database_file= '../database/mcq_metadata.db'
    )  # List of lists, each inner list contains candidates for one longer option

INFO:root:Table 'candidate_shortening_metadata' already exists.
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '[ROLE]:\nYou are an expert educational assessment specialist. You are excellent at shortening multiple-choice question options while preserving their original meaning and following specific syntactic rules.\n\n[TASK]:\nThe user provides a multiple-choice question option that needs to be shortened, along with a syntactic rule that the shortened option must follow, and the target word length range.  \nYour task is to generate 5 different shortened versions of the option, each following the specified syntactic rule and within a target word range. Each version should preserve the original meaning but use different shortening strategies.\n\n[GUIDELINES]:\nPlease generate 5 shortened candidates fllowing these steps:\n\nSTEP 1: MEANING ANALYSIS\nWhat is the core meaning

In [14]:
shortened_options_candidates = shortened_options_candidates_meta.get("candidates", [])

In [15]:
shortened_options_candidates

'["The ego mediates between id\'s drives and superego\'s morals.", "The ego balances id\'s impulses with superego\'s ethics.", "The ego negotiates id\'s urges and superego\'s standards.", "The ego integrates id\'s instincts and superego\'s values.", "The ego aligns id\'s drives with superego\'s moral framework."]'

In [17]:
candidate_selection_meta = await select_best_candidate(
    invocation_id=invocation_id,
    model="gpt-4o",
    options=options,
    option_to_shorten=longer_option_text,
    syntactic_rule=identified_rule,
    min_target=min_length,
    max_target=max_length,
    candidates=shortened_options_candidates,
    table_name="candidate_selection_metadata",
    database_file= '../database/mcq_metadata.db'
)  # Returns the best candidate or None    

INFO:root:Table 'candidate_selection_metadata' created successfully.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/main/modules.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 2197960168480 on C:\Users\ytian126\.cache\huggingface\hub\.locks\models--sentence-transformers--all-MiniLM-L6-v2\952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock
DEBUG:filelock:Lock 2197960168480 acquired on C:\Users\ytian126\.cache\huggingface\hub\.locks\models--sentence-transformers--all-

In [18]:
best_candidate = candidate_selection_meta.get("best_candidate", None)

In [19]:
best_candidate

"The ego balances id's impulses with superego's ethics."

In [1]:
# test the whole pipeline:
from src.option_shortener_workflow import check_and_shorten_long_option 


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
updated_mcq, updated_mcq_answer, token_usage = await check_and_shorten_long_option(
    invocation_id="test20250910",
    mcq=mcq,    
    mcq_answer=mcq_answer,
    model="gpt-4o",
    )

INFO:root:Table 'syntactic_analysis_metadata' already exists.
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '[ROLE]:\nYou are a linguist and expert at syntactic analysis of educational content. \n\n[TASK]:\nThe user provides a multiple choice question, including the question stem and four answer options (A, B, C, D).\nYour task is to analyze the structural patterns in the options and identify common syntactic rules that can be used for option generation and modification.\n\n[GUIDELINES]:\nAnalyze the syntactic structure of these options and identify the common pattern that most options follow. Focus on:\n1. Sentence structure (Subject-Verb-Object patterns)\n2. Common starting words or phrases\n3. Parallel construction patterns\n4. Grammatical elements that appear consistently\n\nProvide a generalized syntactic rule that describes the common structure, similar to these exa

In [8]:
if token_usage:
    print("True")
else:
    print("False")

True


In [9]:
print("Updated MCQ:")
print(updated_mcq)   
print("Updated MCQ Answer:")   
print(updated_mcq_answer)
print("Token usage:")
print(token_usage)

Updated MCQ:
How do the id, ego, and superego interact in Freud's personality model?

A) The ego prioritizes the fulfillment of the id's desires.

B) The superego suppresses the ego entirely to enforce social rules.

C) The id harmonizes completely with the superego to reduce anxiety.

D) The ego balances id impulses and superego norms.
Updated MCQ Answer:
D) The ego balances id impulses and superego norms.
Token usage:
{'input_tokens': 2612, 'output_tokens': 594}


In [7]:
mcq2 = '''What function did early exhibitors serve in the showcasing of movies in theaters?
    \nA) They created and pre-recorded the content displayed in theaters.
    \nB) They determined the arrangement of different elements in the film program.
    \nC) They frequently participated in live performances.
    \nD) They offered guidance to filmmakers regarding suitable movie content.'''

In [12]:
mcq_updated_2, token_usage_2 = await check_and_shorten_long_option(
    invocation_id="test20250910",
    mcq=mcq2,    
    model="gpt-4o",
    )

In [13]:
if token_usage_2:
    print("True")
else:
    print("False")

False
