lambda-feedback · m-messer · Nov 3, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/app/context/physical_quantity.py b/app/context/physical_quantity.py
@@ -476,73 +476,106 @@ def feedback_procedure_generator(parameters_dict):
         graphs.update({label: graph})
     return graphs
 
-
-def expression_preprocess(name, expr, parameters):
-    if parameters.get("strictness", "natural") == "legacy":
-        prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
-        prefixes = []
-        for prefix in prefix_data:
-            prefixes = prefixes+[prefix[0]] + list(prefix[-1])
-        prefix_short_forms = [prefix[1] for prefix in prefix_data]
-        unit_data = set_of_SI_base_unit_dimensions \
-            | set_of_derived_SI_units_in_SI_base_units \
-            | set_of_common_units_in_SI \
-            | set_of_very_common_units_in_SI \
-            | set_of_imperial_units
-        unit_long_forms = prefixes
-        for unit in unit_data:
-            unit_long_forms = unit_long_forms+[unit[0]] + list(unit[-2]) + list(unit[-1])
-        unit_long_forms = "("+"|".join(unit_long_forms)+")"
-        # Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
-        # Example: "newton*metre" ---> "newton metre"
-        search_string = r"(?<!\*)\* *"+unit_long_forms
+def preprocess_legacy(expr, parameters):
+    prefix_data = {(p[0], p[1], tuple(), p[3]) for p in set_of_SI_prefixes}
+    prefixes = []
+    for prefix in prefix_data:
+        prefixes = prefixes + [prefix[0]] + list(prefix[-1])
+    prefix_short_forms = [prefix[1] for prefix in prefix_data]
+    unit_data = set_of_SI_base_unit_dimensions \
+                | set_of_derived_SI_units_in_SI_base_units \
+                | set_of_common_units_in_SI \
+                | set_of_very_common_units_in_SI \
+                | set_of_imperial_units
+    unit_long_forms = prefixes
+    for unit in unit_data:
+        unit_long_forms = unit_long_forms + [unit[0]] + list(unit[-2]) + list(unit[-1])
+    unit_long_forms = "(" + "|".join(unit_long_forms) + ")"
+    # Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
+    # Example: "newton*metre" ---> "newton metre"
+    search_string = r"(?<!\*)\* *" + unit_long_forms
+    match_content = re.search(search_string, expr[1:])
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0] + 1] + match_content.group().replace("*", " ") + expr[
+                                                                                               match_content.span()[
+                                                                                                   1] + 1:]
         match_content = re.search(search_string, expr[1:])
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]+1]+match_content.group().replace("*", " ")+expr[match_content.span()[1]+1:]
-            match_content = re.search(search_string, expr[1:])
-        prefixes = "("+"|".join(prefixes)+")"
-        # Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
-        # Example: "kilo metre" ---> "kilometre"
-        search_string = prefixes+" "+unit_long_forms
+    prefixes = "(" + "|".join(prefixes) + ")"
+    # Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
+    # Example: "kilo metre" ---> "kilometre"
+    search_string = prefixes + " " + unit_long_forms
+    match_content = re.search(search_string, expr)
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0]] + " " + "".join(match_content.group().split()) + expr[
+                                                                                                match_content.span()[
+                                                                                                    1]:]
         match_content = re.search(search_string, expr)
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]]+" "+"".join(match_content.group().split())+expr[match_content.span()[1]:]
-            match_content = re.search(search_string, expr)
-        unit_short_forms = [u[1] for u in unit_data]
-        short_forms = "("+"|".join(list(set(prefix_short_forms+unit_short_forms)))+")"
-        # Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
-        # Example: "100Pa" ---> "100 Pa"
-        search_string = r"[0-9\*\(\)]"+short_forms
+    unit_short_forms = [u[1] for u in unit_data]
+    short_forms = "(" + "|".join(list(set(prefix_short_forms + unit_short_forms))) + ")"
+    # Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
+    # Example: "100Pa" ---> "100 Pa"
+    search_string = r"[0-9\*\(\)]" + short_forms
+    match_content = re.search(search_string, expr)
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0] + 1] + " " + expr[match_content.span()[0] + 1:]
         match_content = re.search(search_string, expr)
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]+1]+" "+expr[match_content.span()[0]+1:]
-            match_content = re.search(search_string, expr)
-        # Remove space after prefix short forms if they are preceded by numbers, multiplication or space
-        # Example: "100 m Pa" ---> "100 mPa"
-        prefix_short_forms = "("+"|".join(prefix_short_forms)+")"
-        search_string = r"[0-9\*\(\) ]"+prefix_short_forms+" "
+    # Remove space after prefix short forms if they are preceded by numbers, multiplication or space
+    # Example: "100 m Pa" ---> "100 mPa"
+    prefix_short_forms = "(" + "|".join(prefix_short_forms) + ")"
+    search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
+    match_content = re.search(search_string, expr)
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-1] + expr[match_content.span()[1]:]
         match_content = re.search(search_string, expr)
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-1]+expr[match_content.span()[1]:]
-            match_content = re.search(search_string, expr)
-        # Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
-        # Example:  "100 m* Pa" ---> "100 mPa"
-        search_string = r"[0-9\*\(\) ]"+prefix_short_forms+"\* "
+    # Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
+    # Example:  "100 m* Pa" ---> "100 mPa"
+    search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
+    match_content = re.search(search_string, expr)
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0] + 1] + match_content.group()[0:-2] + expr[match_content.span()[1]:]
         match_content = re.search(search_string, expr)
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]+1]+match_content.group()[0:-2]+expr[match_content.span()[1]:]
-            match_content = re.search(search_string, expr)
-        # Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
-        # Example:  "100* Pa" ---> "100 Pa"
-        unit_short_forms = "("+"|".join(unit_short_forms)+")"
-        search_string = r"[0-9\(\) ]\* "+unit_short_forms
+    # Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
+    # Example:  "100* Pa" ---> "100 Pa"
+    unit_short_forms = "(" + "|".join(unit_short_forms) + ")"
+    search_string = r"[0-9\(\) ]\* " + unit_short_forms
+    match_content = re.search(search_string, expr)
+    while match_content is not None:
+        expr = expr[0:match_content.span()[0]] + match_content.group().replace("*", " ") + expr[
+                                                                                           match_content.span()[1]:]
         match_content = re.search(search_string, expr)
-        while match_content is not None:
-            expr = expr[0:match_content.span()[0]]+match_content.group().replace("*", " ")+expr[match_content.span()[1]:]
-            match_content = re.search(search_string, expr)
 
-    success = True
-    return success, expr, None
+    return expr
+
+def transform_prefixes_to_standard(expr):
+    """
+    Transform ONLY alternative prefix spellings to standard prefix names.
+    Ensure there's exactly one space after the prefix before the unit.
+    Works for both attached (e.g. 'km') and spaced (e.g. 'k m') forms.
+    """
+
+    for prefix_name, symbol, power, alternatives in set_of_SI_prefixes:
+        for alt in alternatives:
+            if not alt:
+                continue
+
+            # Match the alternative prefix either attached to or followed by spaces before a unit
+            # Examples matched: "km", "k m", "microsecond", "micro second"
+            pattern = rf'(?<!\w){re.escape(alt)}\s*(?=[A-Za-zµΩ])'
+            expr = re.sub(pattern, prefix_name + ' ', expr)
+
+    # Normalize spacing (no multiple spaces)
+    expr = re.sub(r'\s{2,}', ' ', expr).strip()
+
+    return expr
+
+def expression_preprocess(name, expr, parameters):
+    if parameters.get("strictness", "natural") == "legacy":
+        expr = preprocess_legacy(expr, parameters)
+        return True, expr, None
+
+    expr = transform_prefixes_to_standard(expr)
+
+    return True, expr, None
 
 
 def feedback_string_generator(tags, graph, parameters_dict):

diff --git a/app/tests/__init__.py b/app/tests/__init__.py
diff --git a/app/tests/physical_quantity_evaluation_tests.py b/app/tests/physical_quantity_evaluation_tests.py
@@ -374,5 +374,24 @@ def test_answer_zero_value(self):
         result = evaluation_function(res, ans, params, include_test_data=True)
         assert result["is_correct"] is False
 
+    @pytest.mark.parametrize(
+        "ans,res",
+        [
+            ("10 ohm", "10 Ω"),
+            ("10 micro A", "10 μA"),
+            ("10 micro A", "10 μ A"),
+            ("30 degree", "30 °"),
+        ]
+    )
+    def test_greek_letter_units(self, ans, res):
+        params = {
+            'strict_syntax': False,
+            'physical_quantity': True,
+            'elementary_functions': True,
+        }
+        result = evaluation_function(res, ans, params)
+        assert result["is_correct"] is True
+
+
 if __name__ == "__main__":
     pytest.main(['-xk not slow', "--no-header", os.path.abspath(__file__)])
diff --git a/app/tests/symbolic_evaluation_tests.py b/app/tests/symbolic_evaluation_tests.py
@@ -2009,5 +2009,89 @@ def test_abstract_integral(self):
         result = evaluation_function(response, answer, params)
         assert result["is_correct"] is True
 
+    @pytest.mark.parametrize("unicode_char,letter_name", [
+        ("Α", "Alpha"), ("α", "alpha"), ("Β", "Beta"), ("β", "beta"),
+        ("Γ", "Gamma"), ("γ", "gamma"), ("Δ", "Delta"), ("δ", "delta"),
+        ("Ε", "Epsilon"), ("ε", "epsilon"), ("Ζ", "Zeta"), ("ζ", "zeta"),
+        ("Η", "Eta"), ("η", "eta"), ("Θ", "Theta"), ("θ", "theta"),
+        ("Ι", "Iota"), ("ι", "iota"), ("Κ", "Kappa"), ("κ", "kappa"),
+        ("Λ", "Lambda"), ("λ", "lambda"),
+        ("Μ", "Mu"), ("μ", "mu"), ("Ν", "Nu"), ("ν", "nu"),
+        ("Ξ", "Xi"), ("ξ", "xi"), ("Ο", "Omicron"), ("ο", "omicron"),
+        ("Π", "Pi"), ("π", "pi"), ("Ρ", "Rho"), ("ρ", "rho"),
+        ("Σ", "Sigma"), ("σ", "sigma"), ("Τ", "Tau"), ("τ", "tau"),
+        ("Υ", "Upsilon"), ("υ", "upsilon"), ("Φ", "Phi"), ("φ", "phi"),
+        ("Χ", "Chi"), ("χ", "chi"), ("Ψ", "Psi"), ("ψ", "psi"),
+        ("Ω", "Omega"), ("ω", "omega")
+    ])
+    def test_greek_unicode_letters(self, unicode_char, letter_name):
+        response = unicode_char
+        answer = letter_name
+        params = {
+            "strict_syntax": False,
+            "elementary_functions": False,
+        }
+        result = evaluation_function(response, answer, params)
+        assert result["is_correct"] is True
+
+    @pytest.mark.parametrize("unicode_expr,letter_expr", [
+        # Basic expressions with common variables
+        ("α + β", "alpha + beta"),
+        ("2μ + 3", "2*mu + 3"),
+        ("π*r^2", "pi*r^2"),
+        ("θ/2", "theta/2"),
+        ("σ^2", "sigma^2"),
+
+        # Chi vs X confusion tests (CRITICAL)
+        ("χ + x", "chi + x"),
+        ("Χ + X", "Chi + X"),
+        ("χ*x", "chi*x"),
+        ("x^2 + χ", "x^2 + chi"),
+        ("χ^2 + x^2", "chi^2 + x^2"),
+
+        # Xi vs X confusion tests
+        ("ξ + x", "xi + x"),
+        ("Ξ*X", "Xi*X"),
+
+        # Rho vs P confusion tests
+        ("ρ + p", "rho + p"),
+        ("Ρ*P", "Rho*P"),
+
+        # Nu vs V confusion tests
+        ("ν + v", "nu + v"),
+        ("Ν*V", "Nu*V"),
+
+        # Omicron vs O confusion tests
+        ("ο + o", "omicron + o"),
+        ("Ο*O", "Omicron*O"),
+
+        # Multiple Greek letters with Latin variables
+        ("α*x + β*y", "alpha*x + beta*y"),
+        ("μ*σ^2 + ν", "mu*sigma^2 + nu"),
+        ("γ*t + δ*s", "gamma*t + delta*s"),
+        ("Λ*x + μ*y + ν*z", "Lambda*x + mu*y + nu*z"),
+        ("λ*x + μ*y + ν*z", "lambda*x + mu*y + nu*z"),
+
+        # Complex expressions
+        ("sin(θ) + cos(φ)", "sin(theta) + cos(phi)"),
+        ("e^(iπ)", "e^(i*pi)"),
+
+        # Edge cases with similar-looking letters
+        ("ω*t + φ", "omega*t + phi"),
+        ("Ψ(x) + ψ(y)", "Psi(x) + psi(y)"),
+        ("Δx/Δt", "Delta*x/Delta*t"),
+        ("ε_0*μ_0", "epsilon_0*mu_0"),
+    ])
+    def test_greek_letters_in_expressions(self, unicode_expr, letter_expr):
+        response = unicode_expr
+        answer = letter_expr
+        params = {
+            "strict_syntax": False,
+            "elementary_functions": False,
+        }
+        result = evaluation_function(response, answer, params)
+        assert result["is_correct"] is True
+
+
 if __name__ == "__main__":
     pytest.main(['-xk not slow', "--tb=line", '--durations=10', os.path.abspath(__file__)])