Skip to content
Switch branches/tags
Go to file
Cannot retrieve contributors at this time
179 lines (150 sloc) 6.67 KB
Functions for aggregating and analyzing exam-related data, such as calculating
student exam performance.
import csv
import os
import pandas as pd
def grade_scantron(
input_scantron, correct_answers, drops=[], item_value=1, incorrect_threshold=0.5
Calculate student grades from scantron data.
Compiles data collected from a scantron machine (5-option multiple choice
exam) and calculates grades for each student. Also provides descriptive
statistics of exam performance, as well as a list of the questions "most"
students got incorrect, and saves the distribution of answers for those
poorly performing questions.
This function receives 1 scantron text file and produces 2 output files.
Splitting of the scantron data is specific to each scantron machine. The
indices used in this function are correct for the scantron machine in the
UBC Psychology department as of 2015. Indices need to be adjusted for
different machines.
Scantron exams can be finicky. Students who incorrectly fill out scantrons
need to be considered. Make sure to manually inspect the text file output
by the scantron machine for missing answers before running this. This
function does not correct for human error when filling out the scantron.
input_scantron : string
Path to the .txt file produced by the scantron machine.
correct_answers : list
A list of strings containing the *correct* exam answers. For example:
["A", "E", "D", "A", B"]. The order must match the order of
presentation on the exam (i.e. the first list item must correspond
to the first exam question)
drops : list, optional
List of integers containing question numbers that should be excluded
from calculation of grades. For example: [1, 5] will not include
questions 1 and 5 when calculating exam scores.
item_value : int, optional
Integer representing how many points each exam question is worth.
incorrect_threshold : float between [0., 1.], optional
Poorly performing questions are those where few students got the
correct answer. This parameter sets the threshold at which an item is
considered poor. For example, a threshold of 0.4 means that a poor
item is considered to be one where less than 40% of students
chose the correct answer.
# Start and end locations of various pieces of information in the scantron text file.
# These need to be adjusted for different scantron machine.
# Currently set for the machine used in UBC Psychology
surname_idx = (0, 12)
first_name_idx = (12, 21)
student_num_idx = (21, 30)
answers_idx = 30
# output directory
directory, filename = os.path.split(input_scantron)
filename = os.path.splitext(filename)[0]
# calculate total number of points available on the exam
total_points = (len(correct_answers) * item_value) - len(drops)
# create a pandas dataframe to hold the scantron data
summary = ["surname", "first_name", "student_number", "points", "percent"]
questions = ["Q-{}".format(i + 1) for i in range(len(correct_answers))]
df = pd.DataFrame(columns=summary + questions)
# calculate grades
with open(input_scantron, "r") as f:
scantron_data = csv.reader(f)
# loop through every row (student) in the input scantron file
for row in scantron_data:
surname = row[0][surname_idx[0] : surname_idx[1]].lstrip().rstrip()
first_name = row[0][first_name_idx[0] : first_name_idx[1]].lstrip().rstrip()
student_num = (
row[0][student_num_idx[0] : student_num_idx[1]].lstrip().rstrip()
answers = row[0][answers_idx : (answers_idx + len(correct_answers))]
points = 0
for i, pair in enumerate(zip(answers, correct_answers)):
if i + 1 not in drops:
if pair[0] == pair[1]:
points += item_value
df_summary = {
"surname": surname,
"first_name": first_name,
"student_number": student_num,
"points": points,
"percent": (points / total_points) * 100,
df_questions = {"Q-{}".format(i + 1): a for i, a in enumerate(answers)}
df = df.append([{**df_summary, **df_questions}], ignore_index=True)
os.path.join(directory, "{}.xls".format(filename + "_grades")),
# write summary statistics
with open(
os.path.join(directory, "{}.txt".format(filename + "_summary")), "w"
) as f:
# calculate descriptive statistics
N = df.shape[0]
mean_percent = df["percent"].mean()
mean_points = df["points"].mean()
std_points = df["points"].std()
range_points = (df["points"].min(), df["points"].max())
"Descriptive Statistics: \n\n",
"N: {}\n".format(N),
"Mean %: {:.2f}%\n".format(mean_percent),
"Mean score (out of {} points): {:.2f}\n".format(
total_points, mean_points
"Score SD: {:.2f}\n".format(std_points),
"Range: {} (Min: {}, Max: {})\n\n\n".format(
range_points[1] - range_points[0], range_points[0], range_points[1]
if len(drops) > 0:
["Dropped questions: {}\n\n\n".format(", ".join(map(str, drops)))]
"Problem Items (questions that less "
"than {}% of students got correct):\n\n".format(
incorrect_threshold * 100
problems = False
for i, item in enumerate(questions):
cur_q = df[item]
if len(cur_q[cur_q == correct_answers[i]]) < (N * incorrect_threshold):
problems = True
distribution = cur_q.value_counts()
"{} (A: {}, B: {}, C: {}, D: {}, E: {})\n".format(,
if not problems: