Skip to content

Commit

Permalink
Merge pull request #149 from lambda-feedback/tr129-experiment-with-sy…
Browse files Browse the repository at this point in the history
…ntactical-equivalence

Tr129 experiment with syntactical equivalence
  • Loading branch information
KarlLundengaard committed Apr 24, 2024
2 parents dc1002c + 67cd412 commit fbd68de
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 9 deletions.
75 changes: 66 additions & 9 deletions app/symbolic_comparison_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sympy import Abs, Equality, latex, pi, Symbol, Add, Pow, Mul
from sympy.printing.latex import LatexPrinter
from copy import deepcopy
import re

from .expression_utilities import (
substitute_input_symbols,
Expand Down Expand Up @@ -144,22 +145,68 @@ def evaluation_node_internal(unused_input):
def criterion_equality_node(criterion, parameters_dict, label=None):
if label is None:
label = criterion.content_string()
def evaluation_node_internal(unused_input):
def mathematical_equivalence(unused_input):
result = check_equality(criterion, parameters_dict)
if result is True:
return {label+"_TRUE"}
else:
return {label+"_FALSE"}
graph = CriteriaGraph(label)
lhs = criterion.children[0].content_string()
rhs = criterion.children[1].content_string()
END = CriteriaGraph.END
graph.add_node(END)
graph.add_evaluation_node(label, summary=label, details="Checks if "+lhs+"="+rhs+".", evaluate=evaluation_node_internal)
graph.attach(label, label+"_TRUE", summary=lhs+"="+rhs, details=lhs+" is equal to "+rhs+".")
graph.attach(label+"_TRUE", END.label)
graph.attach(label, label+"_FALSE", summary=lhs+"=\\="+rhs, details=lhs+" is not equal to"+rhs+".")
graph.attach(label+"_FALSE", END.label)
lhs = criterion.children[0].content_string()
rhs = criterion.children[1].content_string()
def syntactical_equivalence(unused_input):
result = parameters_dict["original_input"]["answer"] == parameters_dict["original_input"]["response"]
if result is True:
return {label+"_SYNTACTICAL_EQUIVALENCE"+"_TRUE"}
else:
return {label+"_SYNTACTICAL_EQUIVALENCE"+"_FALSE"}
def same_symbols(unused_input):
local_substitutions = parameters_dict.get("local_substitutions",[])
reserved_expressions = list(parameters_dict["reserved_expressions"].items())
parsing_params = {key: value for (key,value) in parameters_dict["parsing_params"].items()}
parsing_params.update({"simplify": False})
for k, item in enumerate(reserved_expressions):
if item[0] == "answer":
reserved_expressions[k] = ("answer", parameters_dict["reserved_expressions"]["answer_original"])
elif item[0] == "response":
reserved_expressions[k] = ("response", parameters_dict["reserved_expressions"]["response_original"])
lsym = parse_expression(lhs, parsing_params).subs(reserved_expressions).subs(local_substitutions)
rsym = parse_expression(rhs, parsing_params).subs(reserved_expressions).subs(local_substitutions)
result = lsym.free_symbols == rsym.free_symbols
if result is True:
return {label+"_SAME_SYMBOLS"+"_TRUE"}
else:
return {label+"_SAME_SYMBOLS"+"_FALSE"}
def is_number(string):
match_content = re.fullmatch('^-?(0|[1-9]\d*)?(\.\d+)?(?<=\d)(e-?(0|[1-9]\d*))?', string)
return match_content is not None and len(match_content.group(0)) > 0
if (lhs == "response" and rhs == "answer" and is_number(parameters_dict["original_input"]["answer"])) or (rhs == "response" and lhs == "answer" and is_number(parameters_dict["original_input"]["answer"])):
graph.add_evaluation_node(label, summary=label, details="Checks if "+str(lhs)+"="+str(rhs)+".", evaluate=mathematical_equivalence)
graph.attach(label, label+"_TRUE", summary=str(lhs)+"="+str(rhs), details=str(lhs)+" is equal to "+str(rhs)+".")
graph.attach(label+"_TRUE", label+"_SYNTACTICAL_EQUIVALENCE", summary="response is written like answer", details="Checks if "+str(lhs)+" is written exactly the same as "+str(rhs)+".", evaluate=syntactical_equivalence)
graph.attach(label+"_SYNTACTICAL_EQUIVALENCE", label+"_SYNTACTICAL_EQUIVALENCE"+"_TRUE", summary="response is written like answer", details=""+str(lhs)+" is written exactly the same as "+str(rhs)+".")
graph.attach(label+"_SYNTACTICAL_EQUIVALENCE"+"_TRUE", END.label)
graph.attach(label+"_SYNTACTICAL_EQUIVALENCE", label+"_SYNTACTICAL_EQUIVALENCE"+"_FALSE", summary="response is not written like answer", details=""+str(lhs)+" is not written exactly the same as "+str(rhs)+".")
graph.attach(label+"_SYNTACTICAL_EQUIVALENCE"+"_FALSE", END.label)
graph.attach(label+"_TRUE", label+"_SAME_SYMBOLS", summary=str(lhs)+" has the same symbols as "+str(rhs), details=str(lhs)+" has the same (free) symbols as "+str(rhs)+".", evaluate=same_symbols)
graph.attach(label+"_SAME_SYMBOLS", label+"_SAME_SYMBOLS"+"_TRUE", summary=str(lhs)+" has the same symbols as "+str(rhs), details=str(lhs)+" has the same (free) symbols as "+str(rhs)+".")
graph.attach(label+"_SAME_SYMBOLS"+"_TRUE", END.label)
graph.attach(label+"_SAME_SYMBOLS", label+"_SAME_SYMBOLS"+"_FALSE", summary=str(lhs)+" does not have the same symbols as "+str(rhs), details=str(lhs)+" does note have the same (free) symbols as "+str(rhs)+".")
graph.attach(label+"_SAME_SYMBOLS"+"_FALSE", END.label)
graph.attach(label, label+"_FALSE", summary=str(lhs)+"=\="+str(rhs), details=str(lhs)+" is not equal to"+str(rhs)+".")
graph.attach(label+"_FALSE", END.label)
else:
graph.add_evaluation_node(label, summary=label, details="Checks if "+str(lhs)+"="+str(rhs)+".", evaluate=mathematical_equivalence)
graph.attach(label, label+"_TRUE", summary=str(lhs)+"="+str(rhs), details=str(lhs)+" is equal to "+str(rhs)+".")
graph.attach(label+"_TRUE", label+"_SAME_SYMBOLS", summary=str(lhs)+" has the same symbols as "+str(rhs), details=str(lhs)+" has the same (free) symbols as "+str(rhs)+".", evaluate=same_symbols)
graph.attach(label+"_SAME_SYMBOLS", label+"_SAME_SYMBOLS"+"_TRUE", summary=str(lhs)+" has the same symbols as "+str(rhs), details=str(lhs)+" has the same (free) symbols as "+str(rhs)+".")
graph.attach(label+"_SAME_SYMBOLS"+"_TRUE", END.label)
graph.attach(label+"_SAME_SYMBOLS", label+"_SAME_SYMBOLS"+"_FALSE", summary=str(lhs)+" does not have the same symbols as "+str(rhs), details=str(lhs)+" does note have the same (free) symbols as "+str(rhs)+".")
graph.attach(label+"_SAME_SYMBOLS"+"_FALSE", END.label)
graph.attach(label, label+"_FALSE", summary=str(lhs)+"=\="+str(rhs), details=str(lhs)+" is not equal to"+str(rhs)+".")
graph.attach(label+"_FALSE", END.label)
return graph

def find_coords_for_node_type(expression, node_type):
Expand Down Expand Up @@ -473,21 +520,30 @@ def symbolic_comparison(response, answer, params, eval_response) -> dict:
eval_response.add_feedback(("NOTATION_WARNING_FACTORIAL", symbolic_comparison_internal_messages["NOTATION_WARNING_FACTORIAL"]))

# Safely try to parse answer and response into symbolic expressions
parsing_params_original = {**parsing_params}
parsing_params_original.update({"rationalise": False, "simplify": False})
try:
res = parse_expression(response, parsing_params)
res_original = parse_expression(response, parsing_params_original)
except Exception as e:
eval_response.is_correct = False
eval_response.add_feedback(("PARSE_ERROR", symbolic_comparison_internal_messages["PARSE_ERROR"](response)))
return eval_response

try:
ans = parse_expression(answer, parsing_params)
ans_original = parse_expression(answer, parsing_params_original)
except Exception as e:
raise Exception(f"SymPy was unable to parse the answer: {answer}.") from e

criteria_parser = generate_criteria_parser()
parsing_params["unsplittable_symbols"] += ("response", "answer", "where")
reserved_expressions = {"response": res, "answer": ans}
reserved_expressions = {
"response": res,
"answer": ans,
"response_original": res_original,
"answer_original": ans_original,
}
criteria_string = substitute_input_symbols(params.get("criteria", "answer=response"), params)[0]
criteria_parsed = create_criteria_list(criteria_string, criteria_parser, parsing_params)

Expand Down Expand Up @@ -521,6 +577,7 @@ def symbolic_comparison(response, answer, params, eval_response) -> dict:
"reference_criteria_strings": reference_criteria_strings,
"symbolic_comparison_criteria": symbolic_comparison_criteria,
"eval_response": eval_response,
"original_input": {"answer": answer, "response": response},
"disabled_evaluation_nodes": params.get("disabled_evaluation_nodes", set())
}
criteria_graphs = create_criteria_graphs(criteria_parsed, parameters_dict)
Expand Down
3 changes: 3 additions & 0 deletions app/symbolic_comparison_evaluation_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,9 @@ def test_no_reserved_keywords_in_old_format_input_symbol_alternatives(self):
("15", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_EXPONENT_FLIP"], {}), #NOTE: Sympy represents input as (x+y)/y so flipping the exponent gives (x+y)*y instead of x*y+1
("-1/3", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_ADDITION_TO_SUBTRACTION"], {}),
("13", "x+y*z-1", "response=answer where x=2; y=3; z=4", True, [], {}),
("2", "2", "response=answer", True, ["response=answer_SYNTACTICAL_EQUIVALENCE_TRUE", "response=answer_SAME_SYMBOLS_TRUE"], {}),
("4/2", "2", "answer=response", True, ["answer=response_SYNTACTICAL_EQUIVALENCE_FALSE"], {}),
("2+x-x", "2", "answer=response", True, ["answer=response_SAME_SYMBOLS_FALSE"], {}),
]
)
def test_criteria_based_comparison(self, response, answer, criteria, value, feedback_tags, additional_params):
Expand Down

0 comments on commit fbd68de

Please sign in to comment.