Skip to content
153 changes: 93 additions & 60 deletions app/context/physical_quantity.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,73 +476,106 @@ def feedback_procedure_generator(parameters_dict):
graphs.update({label: graph})
return graphs


def expression_preprocess(name, expr, parameters):
if parameters.get("strictness", "natural") == "legacy":
prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
prefixes = []
for prefix in prefix_data:
prefixes = prefixes+[prefix[0]] + list(prefix[-1])
prefix_short_forms = [prefix[1] for prefix in prefix_data]
unit_data = set_of_SI_base_unit_dimensions \
| set_of_derived_SI_units_in_SI_base_units \
| set_of_common_units_in_SI \
| set_of_very_common_units_in_SI \
| set_of_imperial_units
unit_long_forms = prefixes
for unit in unit_data:
unit_long_forms = unit_long_forms+[unit[0]] + list(unit[-2]) + list(unit[-1])
unit_long_forms = "("+"|".join(unit_long_forms)+")"
# Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
# Example: "newton*metre" ---> "newton metre"
search_string = r"(?<!\*)\* *"+unit_long_forms
def preprocess_legacy(expr, parameters):
prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
prefixes = []
for prefix in prefix_data:
prefixes = prefixes + [prefix[0]] + list(prefix[-1])
prefix_short_forms = [prefix[1] for prefix in prefix_data]
unit_data = set_of_SI_base_unit_dimensions \
| set_of_derived_SI_units_in_SI_base_units \
| set_of_common_units_in_SI \
| set_of_very_common_units_in_SI \
| set_of_imperial_units
unit_long_forms = prefixes
for unit in unit_data:
unit_long_forms = unit_long_forms + [unit[0]] + list(unit[-2]) + list(unit[-1])
unit_long_forms = "(" + "|".join(unit_long_forms) + ")"
# Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
# Example: "newton*metre" ---> "newton metre"
search_string = r"(?<!\*)\* *" + unit_long_forms
match_content = re.search(search_string, expr[1:])
while match_content is not None:
expr = expr[0:match_content.span()[0] + 1] + match_content.group().replace("*", " ") + expr[
match_content.span()[
1] + 1:]
match_content = re.search(search_string, expr[1:])
while match_content is not None:
expr = expr[0:match_content.span()[0]+1]+match_content.group().replace("*", " ")+expr[match_content.span()[1]+1:]
match_content = re.search(search_string, expr[1:])
prefixes = "("+"|".join(prefixes)+")"
# Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
# Example: "kilo metre" ---> "kilometre"
search_string = prefixes+" "+unit_long_forms
prefixes = "(" + "|".join(prefixes) + ")"
# Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
# Example: "kilo metre" ---> "kilometre"
search_string = prefixes + " " + unit_long_forms
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]] + " " + "".join(match_content.group().split()) + expr[
match_content.span()[
1]:]
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]]+" "+"".join(match_content.group().split())+expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)
unit_short_forms = [u[1] for u in unit_data]
short_forms = "("+"|".join(list(set(prefix_short_forms+unit_short_forms)))+")"
# Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
# Example: "100Pa" ---> "100 Pa"
search_string = r"[0-9\*\(\)]"+short_forms
unit_short_forms = [u[1] for u in unit_data]
short_forms = "(" + "|".join(list(set(prefix_short_forms + unit_short_forms))) + ")"
# Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
# Example: "100Pa" ---> "100 Pa"
search_string = r"[0-9\*\(\)]" + short_forms
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0] + 1] + " " + expr[match_content.span()[0] + 1:]
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]+1]+" "+expr[match_content.span()[0]+1:]
match_content = re.search(search_string, expr)
# Remove space after prefix short forms if they are preceded by numbers, multiplication or space
# Example: "100 m Pa" ---> "100 mPa"
prefix_short_forms = "("+"|".join(prefix_short_forms)+")"
search_string = r"[0-9\*\(\) ]"+prefix_short_forms+" "
# Remove space after prefix short forms if they are preceded by numbers, multiplication or space
# Example: "100 m Pa" ---> "100 mPa"
prefix_short_forms = "(" + "|".join(prefix_short_forms) + ")"
search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-1] + expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-1]+expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)
# Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
# Example: "100 m* Pa" ---> "100 mPa"
search_string = r"[0-9\*\(\) ]"+prefix_short_forms+"\* "
# Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
# Example: "100 m* Pa" ---> "100 mPa"
search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-2] + expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-2]+expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)
# Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
# Example: "100* Pa" ---> "100 Pa"
unit_short_forms = "("+"|".join(unit_short_forms)+")"
search_string = r"[0-9\(\) ]\* "+unit_short_forms
# Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
# Example: "100* Pa" ---> "100 Pa"
unit_short_forms = "(" + "|".join(unit_short_forms) + ")"
search_string = r"[0-9\(\) ]\* " + unit_short_forms
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]] + match_content.group().replace("*", " ") + expr[
match_content.span()[1]:]
match_content = re.search(search_string, expr)
while match_content is not None:
expr = expr[0:match_content.span()[0]]+match_content.group().replace("*", " ")+expr[match_content.span()[1]:]
match_content = re.search(search_string, expr)

success = True
return success, expr, None
return expr

def transform_prefixes_to_standard(expr):
"""
Transform ONLY alternative prefix spellings to standard prefix names.
Ensure there's exactly one space after the prefix before the unit.
Works for both attached (e.g. 'km') and spaced (e.g. 'k m') forms.
"""

for prefix_name, symbol, power, alternatives in set_of_SI_prefixes:
for alt in alternatives:
if not alt:
continue

# Match the alternative prefix either attached to or followed by spaces before a unit
# Examples matched: "km", "k m", "microsecond", "micro second"
pattern = rf'(?<!\w){re.escape(alt)}\s*(?=[A-Za-zµΩ])'
expr = re.sub(pattern, prefix_name + ' ', expr)

# Normalize spacing (no multiple spaces)
expr = re.sub(r'\s{2,}', ' ', expr).strip()

return expr

def expression_preprocess(name, expr, parameters):
if parameters.get("strictness", "natural") == "legacy":
expr = preprocess_legacy(expr, parameters)
return True, expr, None

expr = transform_prefixes_to_standard(expr)

return True, expr, None


def feedback_string_generator(tags, graph, parameters_dict):
Expand Down
Empty file added app/tests/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions app/tests/physical_quantity_evaluation_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,5 +374,24 @@ def test_answer_zero_value(self):
result = evaluation_function(res, ans, params, include_test_data=True)
assert result["is_correct"] is False

@pytest.mark.parametrize(
"ans,res",
[
("10 ohm", "10 Ω"),
("10 micro A", "10 μA"),
("10 micro A", "10 μ A"),
("30 degree", "30 °"),
]
)
def test_greek_letter_units(self, ans, res):
params = {
'strict_syntax': False,
'physical_quantity': True,
'elementary_functions': True,
}
result = evaluation_function(res, ans, params)
assert result["is_correct"] is True


if __name__ == "__main__":
pytest.main(['-xk not slow', "--no-header", os.path.abspath(__file__)])
84 changes: 84 additions & 0 deletions app/tests/symbolic_evaluation_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2009,5 +2009,89 @@ def test_abstract_integral(self):
result = evaluation_function(response, answer, params)
assert result["is_correct"] is True

@pytest.mark.parametrize("unicode_char,letter_name", [
("Α", "Alpha"), ("α", "alpha"), ("Β", "Beta"), ("β", "beta"),
("Γ", "Gamma"), ("γ", "gamma"), ("Δ", "Delta"), ("δ", "delta"),
("Ε", "Epsilon"), ("ε", "epsilon"), ("Ζ", "Zeta"), ("ζ", "zeta"),
("Η", "Eta"), ("η", "eta"), ("Θ", "Theta"), ("θ", "theta"),
("Ι", "Iota"), ("ι", "iota"), ("Κ", "Kappa"), ("κ", "kappa"),
("Λ", "Lambda"), ("λ", "lambda"),
("Μ", "Mu"), ("μ", "mu"), ("Ν", "Nu"), ("ν", "nu"),
("Ξ", "Xi"), ("ξ", "xi"), ("Ο", "Omicron"), ("ο", "omicron"),
("Π", "Pi"), ("π", "pi"), ("Ρ", "Rho"), ("ρ", "rho"),
("Σ", "Sigma"), ("σ", "sigma"), ("Τ", "Tau"), ("τ", "tau"),
("Υ", "Upsilon"), ("υ", "upsilon"), ("Φ", "Phi"), ("φ", "phi"),
("Χ", "Chi"), ("χ", "chi"), ("Ψ", "Psi"), ("ψ", "psi"),
("Ω", "Omega"), ("ω", "omega")
])
def test_greek_unicode_letters(self, unicode_char, letter_name):
response = unicode_char
answer = letter_name
params = {
"strict_syntax": False,
"elementary_functions": False,
}
result = evaluation_function(response, answer, params)
assert result["is_correct"] is True

@pytest.mark.parametrize("unicode_expr,letter_expr", [
# Basic expressions with common variables
("α + β", "alpha + beta"),
("2μ + 3", "2*mu + 3"),
("π*r^2", "pi*r^2"),
("θ/2", "theta/2"),
("σ^2", "sigma^2"),

# Chi vs X confusion tests (CRITICAL)
("χ + x", "chi + x"),
("Χ + X", "Chi + X"),
("χ*x", "chi*x"),
("x^2 + χ", "x^2 + chi"),
("χ^2 + x^2", "chi^2 + x^2"),

# Xi vs X confusion tests
("ξ + x", "xi + x"),
("Ξ*X", "Xi*X"),

# Rho vs P confusion tests
("ρ + p", "rho + p"),
("Ρ*P", "Rho*P"),

# Nu vs V confusion tests
("ν + v", "nu + v"),
("Ν*V", "Nu*V"),

# Omicron vs O confusion tests
("ο + o", "omicron + o"),
("Ο*O", "Omicron*O"),

# Multiple Greek letters with Latin variables
("α*x + β*y", "alpha*x + beta*y"),
("μ*σ^2 + ν", "mu*sigma^2 + nu"),
("γ*t + δ*s", "gamma*t + delta*s"),
("Λ*x + μ*y + ν*z", "Lambda*x + mu*y + nu*z"),
("λ*x + μ*y + ν*z", "lambda*x + mu*y + nu*z"),

# Complex expressions
("sin(θ) + cos(φ)", "sin(theta) + cos(phi)"),
("e^(iπ)", "e^(i*pi)"),

# Edge cases with similar-looking letters
("ω*t + φ", "omega*t + phi"),
("Ψ(x) + ψ(y)", "Psi(x) + psi(y)"),
("Δx/Δt", "Delta*x/Delta*t"),
("ε_0*μ_0", "epsilon_0*mu_0"),
])
def test_greek_letters_in_expressions(self, unicode_expr, letter_expr):
response = unicode_expr
answer = letter_expr
params = {
"strict_syntax": False,
"elementary_functions": False,
}
result = evaluation_function(response, answer, params)
assert result["is_correct"] is True


if __name__ == "__main__":
pytest.main(['-xk not slow', "--tb=line", '--durations=10', os.path.abspath(__file__)])
Loading