In [29]:
import pandas as pd
import os
import logging
import re
import numpy as np
logger = logging.getLogger()

In [56]:
def process_task2(f):
    
    valid_lines = []
    id_pattern = re.compile("^N\d{8}")
    total_lines_cnt = 0
    for line in f:
        total_lines_cnt += 1
        if len(line.split(",")) != 26:
            print("Invalid line of data: does not contain exactly 26 values:")
            print(line)
        elif re.fullmatch(id_pattern, line.split(',')[0]) is None:
            print("Invalid line of data: N# is invalid")
            print(line)
        else:
            valid_lines.append(line)
    
    return valid_lines, total_lines_cnt

In [60]:
def process_task3(lines):
    answer_key = "B,A,D,D,C,B,D,A,C,C,D,B,A,B,A,C,B,D,A,C,A,A,B,D,D".split(",")
    
    # init output
    scores = dict()
    skip_ones = {i: 0 for i in range(1, 26)}
    wrong_ones = {i: 0 for i in range(1, 26)}
    
    for line in lines:
        temp = [s.strip() for s in line.split(",")]
        student_id, selections = temp[0], temp[1:]
        # print(selections)
        score = 0
        for i in range(len(answer_key)):
            # print(i)
            if selections[i] == '':
                skip_ones[i+1] += 1
            elif selections[i] != answer_key[i]:
                wrong_ones[i+1] += 1
                score -= 1
            else:
                score += 4
                continue
        scores[student_id] = score
    
    # statistic
    highest_skip = sorted(skip_ones.values(), reverse=True)[0]
    highest_wrong = sorted(wrong_ones.values(), reverse=True)[0]
    most_skip_ones = [(k, v, round(v / len(lines), 3)) for k, v in skip_ones.items() if v == highest_skip]
    most_wrong_ones = [(k, v, round(v / len(lines), 3)) for k, v in wrong_ones.items() if v == highest_wrong]
    most_skip_ones = ", ".join([" - ".join([str(i) for i in m]) for m in most_skip_ones])
    most_wrong_ones = ", ".join([" - ".join([str(i) for i in m]) for m in most_wrong_ones])
    
    return scores, most_skip_ones, most_wrong_ones

In [69]:
folder_path = "../data"

while True:
    try:
        file_name = input("Enter a class file to grade (i.e. class1 for class1.txt): ")
        if file_name == 'q':
            break
        file_path = f"{folder_path}/{file_name}"
        with open(file_path) as f:
            print(f"Successfully opened {file_name}")
            print("**** ANALYZING ****")
            
            # task 2
            valid_lines, total_lines_cnt = process_task2(f)
            if len(valid_lines) == total_lines_cnt:
                print("No errors found!")
            
            # task 3
            scores, most_skip_ones, most_wrong_ones = process_task3(valid_lines)
            highest_score = sorted(scores.values(), reverse=True)[0]
            lowest_score = sorted(scores.values(), reverse=False)[0]
            range_score = highest_score - lowest_score
            mean_score = np.mean(list(scores.values()))
            median_score = np.median(list(scores.values()))
            
            # task 4
            scores_df = pd.DataFrame(scores.items(), columns=['student_id', 'score'])
            scores_df.to_csv(f"{file_name.split('.')[0]}_grades.csv", index=False, header=False)
            
            # report
            print("**** REPORT ****")
            print(f"Total valid lines of data: {len(valid_lines)}")
            print(f"Total invalid lines of data: {total_lines_cnt - len(valid_lines)}")
            print(f"Mean (average) score: {mean_score}")
            print(f"Highest score: {highest_score}")
            print(f"Lowest score: {lowest_score}")
            print(f"Range of scores: {range_score}")
            print(f"Median score: {median_score}")
            print(f"Question that most people skip: {most_skip_ones}")
            print(f"Question that most people answer incorrectly: {most_wrong_ones}")

    except FileNotFoundError as e:
        logger.exception(e)
        print(e)

Enter a class file to grade (i.e. class1 for class1.txt): class1.txt
Successfully opened class1.txt
**** ANALYZING ****
No errors found!
**** REPORT ****
Total valid lines of data: 20
Total invalid lines of data: 0
Mean (average) score: 75.6
Highest score: 91
Lowest score: 59
Range of scores: 32
Median score: 73.0
Question that most people skip: 3 - 4 - 0.2, 5 - 4 - 0.2, 23 - 4 - 0.2
Question that most people answer incorrectly: 10 - 4 - 0.2, 14 - 4 - 0.2, 16 - 4 - 0.2, 19 - 4 - 0.2, 22 - 4 - 0.2
Enter a class file to grade (i.e. class1 for class1.txt): q


In [62]:
scores

{'N00000021': 68,
 'N00000022': 76,
 'N00000024': 73,
 'N00000026': 72,
 'N00000028': 73,
 'N00000029': 87,
 'N00000030': 82,
 'N00000031': 76,
 'N00000032': 87,
 'N00000033': 77,
 'N00000034': 69,
 'N00000036': 77,
 'N00000037': 75,
 'N00000038': 73,
 'N00000039': 66,
 'N00000040': 73,
 'N00000041': 91,
 'N00000042': 100,
 'N00000043': 86,
 'N00000044': 90,
 'N00000045': 67}

In [66]:
pd.DataFrame(scores.items(), columns=['student_id', 'score'])

Unnamed: 0,student_id,score
0,N00000021,68
1,N00000022,76
2,N00000024,73
3,N00000026,72
4,N00000028,73
5,N00000029,87
6,N00000030,82
7,N00000031,76
8,N00000032,87
9,N00000033,77
