/
grade_utils.py
164 lines (133 loc) · 5 KB
/
grade_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import os
import sentence_tokenizer
import word_order
import essay_utils
from math import floor
from cmd_utils import log
cols = ('1a', '1b', '1c', '1d', '2a', '2b', '3a')
implemented_grades = ('1a', '1b', '1c', '1d', '2a', '2b', '3a')
grades = [[float(n) for n in l.split()[1:]] for l in open(os.path.join("data/grades.txt")).readlines()[::-1][:-5]]
def correct_essay_grade(essay_index, grade_type):
if grade_type not in cols:
raise Exception('%s is not a valid grade category' % (grade_type),)
return grades[essay_index][cols.index(grade_type)]
def grade_text(text, grade_type):
if grade_type not in cols:
raise Exception('%s is not a valid grade category' % (grade_type),)
if grade_type == "3a":
return grade_3a(text)
elif grade_type == "1a":
return grade_1a(text)
elif grade_type == '1b':
return grade_1b(text)
elif grade_type == '1c':
return grade_1c(text)
elif grade_type == '1d':
return grade_1d(text)
elif grade_type == '2a':
return grade_2a(text)
elif grade_type == '2b':
return grade_2b(text)
def grade_1a(text):
sentences = sentence_tokenizer.parse(text)
num_problems = 0
num_sentences = 0
for sentence in sentences:
issues_in_sentence = word_order.issues_in_sentence(sentence)
num_sentences += 1
num_problems += len(issues_in_sentence)
if num_problems in (0, 1):
return 5
elif num_problems == 2:
return 4
elif num_problems in (3, 4):
return 3
elif num_problems in (5, 6):
return 2
else:
return 1
def grade_1b(text):
import agreement_utils
rs = agreement_utils.parse(text)
num_agreements, num_non_agreements, num_unsure = rs
num_agreements_tested = sum(rs)
if num_agreements_tested == 0:
log("No possible agreements found in text", 2)
return 0
else:
log("Sub Scores: %s" % (rs,), 2)
prob = float(num_agreements) / sum(rs)
log("%d/%d -> %f" % (num_agreements, sum(rs), prob), 2)
return floor(prob * 5)
def grade_1c(text):
return 3.2
def grade_1d(text):
import syntactic_formation
import math
sentence_problems = syntactic_formation.parse(text)
num_sentences_with_problems = sum([1 if count > 0 else 0 for count in sentence_problems])
num_sentences = len(sentence_problems)
score = max(0, math.floor((1 - (float(num_sentences_with_problems)/num_sentences)) * 5))
return score
def grade_2a(text):
import text_coherence
# weights
weight_2nd_person = -1
weight_3rd_person_good = 1
weight_3rd_person_bad = -2
weight_3rd_person_avg = .5
weight_sen = .2
pronoun_biz = text_coherence.parse(text)
first_person_pronouns = []
second_person_pronouns = []
third_person_pronouns = []
score = 0
for sentence in pronoun_biz:
first_person_pronouns += sentence[0]
second_person_pronouns += sentence[1]
third_person_pronouns += sentence[2]
for third_person_pronoun in third_person_pronouns:
if third_person_pronoun[1] == -1:
score += (weight_3rd_person_bad)
elif third_person_pronoun[1] == .5:
score += (weight_3rd_person_avg)
else:
score += (weight_3rd_person_good)
return max(1, min(5, (2 + (weight_2nd_person * len(second_person_pronouns)) + score + (len(pronoun_biz) * weight_sen))))
def grade_2b(text):
import topic_coherence
from math import ceil
family_weight = 1
work_weight = .5
family_hits, work_hits, nouns = topic_coherence.parse(text)
score = (float(family_hits * (5 * family_weight)) / (nouns - 1))
score += (float(work_hits * (5 * work_weight)) / (nouns - 1))
return ceil(min(5, score, nouns))
def grade_3a(text):
sentences = sentence_tokenizer.parse(text)
num_sentences = len(sentences)
if num_sentences >= 6:
return 5
else:
return max(num_sentences - 1, 1)
if __name__ == '__main__':
import cmd_utils
tests = cmd_utils.cmd_test()
tests = [tests] if tests else ('1a', '1b', '1d', '2a', '2b', '3a')
essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1
for test in tests:
if essay_index >= 0:
essay_text = "\n".join(essay_utils.essays[essay_index])
received_grade = grade_text(essay_text, test)
log("Expect %s score: %d" % (test, correct_essay_grade(essay_index, test)), 0)
log("Received %s score: %d" % (test, received_grade), 0)
else:
print "Values for %s" % (test,)
print "-------------"
for i in range(0, len(essay_utils.essays)):
essay_text = "\n".join(essay_utils.essays[i])
received_grade = grade_text(essay_text, test)
expected_grade = correct_essay_grade(i, test)
diff = received_grade - expected_grade
print " | ".join([str(s) for s in [(i + 1), expected_grade, received_grade, diff, abs(diff)]])
print "\n\n"