Skip to content

Commit

Permalink
Merge pull request #23 from openstax/question-lookup-fixes
Browse files Browse the repository at this point in the history
move qid lookup to app object, so it is updated by ecosystem import; …
  • Loading branch information
reedstrm committed Oct 28, 2019
2 parents 9d25685 + 5b9149a commit 1d94bdd
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 17 deletions.
4 changes: 4 additions & 0 deletions validator/app.py
Expand Up @@ -42,6 +42,10 @@ def create_app(**kwargs):

app.df = df

app.qids = {}
for idcol in ("uid", "qid"):
app.qids[idcol] = set(df["questions"][idcol].values.tolist())

app.register_blueprint(read_api.bp)
app.register_blueprint(write_api.bp)
app.register_blueprint(validate_api.bp)
Expand Down
35 changes: 18 additions & 17 deletions validator/validate_api.py
Expand Up @@ -26,7 +26,6 @@

VALIDITY_FEATURE_DICT = {}
PARSER_DEFAULTS = {}
qids = {}
parser = None
common_vocab = set()

Expand All @@ -35,19 +34,14 @@

@bp.record_once
def setup_parse_and_data(setup_state):
global VALIDITY_FEATURE_DICT, PARSER_DEFAULTS, qids, parser, common_vocab
global VALIDITY_FEATURE_DICT, PARSER_DEFAULTS, parser, common_vocab

PARSER_DEFAULTS = setup_state.app.config["PARSER_DEFAULTS"]
SPELLING_CORRECTION_DEFAULTS = setup_state.app.config[
"SPELLING_CORRECTION_DEFAULTS"
]
VALIDITY_FEATURE_DICT = setup_state.app.config["VALIDITY_FEATURE_DICT"]

df = setup_state.app.df
qids = {}
for idcol in ("uid", "qid"):
qids[idcol] = set(df["questions"][idcol].values.tolist())

# Create the parser, initially assign default values
# (these can be overwritten during calls to process_string)
parser = StaxStringProc(
Expand All @@ -72,21 +66,28 @@ def setup_parse_and_data(setup_state):

def get_question_data_by_key(key, val):
df = current_app.df
first_q = df["questions"][df["questions"][key] == val].iloc[0]
module_id = first_q.cvuid
uid = first_q.uid
has_numeric = df["questions"][df["questions"][key] == val].iloc[0].contains_number
# FIXME - should use all the questions and combine associated pages
# FIXME - last_q works better because of some dirty data getting through
# that has innovation pages but not the exact book those pages are from
last_q = df["questions"][df["questions"][key] == val].iloc[-1]
module_id = last_q.cvuid
uid = last_q.uid
has_numeric = last_q.contains_number
innovation_vocab = (
df["innovation"][df["innovation"]["cvuid"] == module_id]
.iloc[0]
.innovation_words
)
vuid = module_id.split(":")[0]
domain_vocab = df["domain"][df["domain"]["vuid"] == vuid].iloc[0].domain_words
domain_vocab_df = df["domain"][df["domain"]["vuid"] == vuid]
if domain_vocab_df.empty:
domain_vocab = set()
else:
domain_vocab = domain_vocab_df.iloc[-1].domain_words

# A better way . . . pre-process and then just to a lookup
question_vocab = first_q["stem_words"]
mc_vocab = first_q["mc_words"]
question_vocab = last_q["stem_words"]
mc_vocab = last_q["mc_words"]
vocab_dict = OrderedDict(
{
"stem_word_count": question_vocab,
Expand All @@ -105,9 +106,9 @@ def get_question_data_by_key(key, val):
def get_question_data(uid):
if uid is not None:
qid = uid.split("@")[0]
if uid in qids["uid"]:
if uid in current_app.qids["uid"]:
return get_question_data_by_key("uid", uid)
elif qid in qids["qid"]:
elif qid in current_app.qids["qid"]:
return get_question_data_by_key("qid", qid)
# no uid, or not in data sets
default_vocab_dict = OrderedDict(
Expand Down Expand Up @@ -254,7 +255,7 @@ def validate_response(
return_dictionary["tag_numeric_input"] = tag_numeric_input
return_dictionary["spelling_correction"] = spelling_correction
return_dictionary["uid_used"] = uid_used
return_dictionary["uid_found"] = uid_used in qids["uid"]
return_dictionary["uid_found"] = uid_used in current_app.qids["uid"]
return_dictionary["lazy_math_evaluation"] = lazy_math_mode

# If lazy_math_mode, do a lazy math check and update valid accordingly
Expand Down
4 changes: 4 additions & 0 deletions validator/write_api.py
Expand Up @@ -52,6 +52,10 @@ def update_fixed_data(df_domain_, df_innovation_, df_questions_):
df["innovation"] = df["innovation"].append(df_innovation_, sort=False)
df["questions"] = df["questions"].append(df_questions_, sort=False)

# Update qid sets - for shortcutting question lookup
for idcol in ("uid", "qid"):
current_app.qids[idcol] = set(df["questions"][idcol].values.tolist())

# Finally, write the updated dataframes to disk and declare victory
data_dir = current_app.config["DATA_DIR"]
write_fixed_data(df["domain"], df["innovation"], df["questions"], data_dir)
Expand Down

0 comments on commit 1d94bdd

Please sign in to comment.