In [1]:
import random
import pandas as pd
pd.set_option('display.max_rows', None)

NUMBER_OF_QUESTIONS = 2
VALIDATION_SET = True

# standard offset; Questions start at #1
# if it's the mismatched offset given by the validation set,
# then the offset will be changed to 5 because the second validation set starts at question #5
data_offset = 1 
if VALIDATION_SET == True:
    data_offset = 5

# import questions
# print("Questions\n")
questions_file = 'Questions_data_prj3_validation - Question_data.csv'
# questions_file = 'Questions_data_prj3.csv'
questions_df = pd.read_csv(questions_file)
# display(questions_df)

# import answers
# print("Answers\n")
answers_file = 'Answers_data_prj3_validation - Answer_data.csv'
# answers_file = 'Answers_data_prj3_update2.csv'
answers_df = pd.read_csv(answers_file)
# display(answers_df)

# add question text to final df
answers_df['Question_text'] = pd.Series(dtype='str')

# add feedback text to each question
answers_df['feedback_text'] = pd.Series(dtype='str')

# mark answer rows as correct or incorrect
answers_df['is_correct'] = pd.Series(dtype='Int64')

# generate a specific question's answers
def generate_question_answers(answers_df, question_id):
    rows_by_question_id = answers_df[answers_df['Question_id'] == question_id]
    correct_answers = rows_by_question_id[rows_by_question_id['is_correct'] == 1]
    incorrect_answers = rows_by_question_id[rows_by_question_id['is_correct'] == 0]

    # the incorrect answer options should be 3 * 2 usually, if a student wants to retake the problem
    # so they have 3 new options on the second retake
    # however, if there are not enough answers in the incorrect_answers df, then it chooses as many as it can
    number_of_incorrect_answers_to_select = 6
    if len(incorrect_answers) < 6:
        number_of_incorrect_answers_to_select = len(incorrect_answers)

    incorrect_answer_indicies = random.sample(range(0, len(incorrect_answers)), number_of_incorrect_answers_to_select)
    partial_answer_options_array = []

    partial_answer_options_array.append(correct_answers.iloc[random.randrange(0, len(correct_answers) - 1, 1)])
    for index in incorrect_answer_indicies:
        partial_answer_options_array.append(incorrect_answers.iloc[index])

    return partial_answer_options_array

# make the second half the sentence lowercase
def process_multiple_correct_answers(choice):
    choice = choice[0].lower() + choice[1:] 
    return choice

for index, row in answers_df.iterrows():
    # setup
    corresponding_question_row = questions_df.loc[questions_df['Question_id'] == row['Question_id']]
    correct_answer = str(corresponding_question_row['Correct_answer_choice'].iloc[0]).replace(" ", "")
    student_answer = str(row['Student_choice_on_question']).replace(" ", "")

    # add question text
    answers_df.at[index, 'Question_text'] = corresponding_question_row['Question_text'][row['Question_id'] - data_offset]

    # parse feedback for all options
    feedback_text = ""
    
    choice_a = corresponding_question_row['Choice_A_text'][row['Question_id'] - data_offset]
    choice_b = corresponding_question_row['Choice_B_text'][row['Question_id'] - data_offset]
    choice_c = corresponding_question_row['Choice_C_text'][row['Question_id'] - data_offset]
    choice_d = corresponding_question_row['Choice_D_text'][row['Question_id'] - data_offset]

    if "A" in correct_answer:
        if (len(feedback_text) > 0):
            feedback_text += "and "
            choice_a = process_multiple_correct_answers(choice_a)
            
        feedback_text += choice_a + " "
    if "B" in correct_answer:
        if (len(feedback_text) > 0):
            feedback_text += "and "
            choice_b = process_multiple_correct_answers(choice_b)
            
        feedback_text += choice_b + " "
    if "C" in correct_answer:
        if (len(feedback_text) > 0):
            feedback_text += "and "
            choice_c = process_multiple_correct_answers(choice_c)
            
        feedback_text += choice_c + " "
    if "D" in correct_answer:
        if (len(feedback_text) > 0):
            feedback_text += "and "
            choice_d = process_multiple_correct_answers(choice_d)
            
        feedback_text += choice_d + " "
        
    feedback_text = feedback_text.capitalize()  
    answers_df.at[index, 'feedback_text'] = feedback_text
    
    # mark answer correct or incorrect
    if (student_answer == correct_answer):
        answers_df.at[index, 'is_correct'] = 1
    else:
        answers_df.at[index, 'is_correct'] = 0

# append results of previous function to answer options array
answer_options_array = []
for i in range(data_offset, data_offset + NUMBER_OF_QUESTIONS):
    partial_answer_options_array = generate_question_answers(answers_df, i)
    for item in partial_answer_options_array:
        answer_options_array.append(item)
    
answer_options_df = pd.DataFrame(answer_options_array, columns = list(answers_df.columns.values))
display(answer_options_df)

# export to json
answer_options_df.to_json('export_dataframe.json')

# export to csv
# answer_options_df.to_csv('export_dataframe.csv')


Unnamed: 0,Answer_text,Question_id,Student_score_on_question,Student_choice_on_question,Quiz_score,Average_quizzes_score,Question_text,feedback_text,is_correct
6,increased attention Generate their own knowled...,5,1.0,"A,C",8.75,7.8,"In a math class activity, students are trying ...",Students will better select the right formula ...,1
3,"by following the 'contrasting' path, students ...",5,0.5,"B,C",9.5,8.1,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
1,Students are likely to gain the ability to con...,5,0.75,"A,B,C",9.75,8.1,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
16,Contrasting cases are helpful for convergent t...,5,0.5,"A,B,C,D",8.0,6.7,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
9,Students will remember the formulas longer tha...,5,0.75,"A,B,C",7.25,6.8,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
19,Students would get deeper knowledge of how are...,5,0.5,"B,C",8.25,6.8,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
17,Students will perceive a better learning of th...,5,0.75,"A,B,C",7.5,6.8,"In a math class activity, students are trying ...",Students will better select the right formula ...,0
43,Test both to see which one works better becaus...,6,1.0,A,7.5,7.0,The “Stable-pairing” design pattern suggests t...,Conduct a controlled test (or a/b test) to see...,1
40,I would choose a design pattern that could yie...,6,0.0,B,8.75,6.8,The “Stable-pairing” design pattern suggests t...,Conduct a controlled test (or a/b test) to see...,0
39,Stable pairing can detract from diversity in l...,6,0.0,C,6.75,7.1,The “Stable-pairing” design pattern suggests t...,Conduct a controlled test (or a/b test) to see...,0
