-
Notifications
You must be signed in to change notification settings - Fork 0
/
questions-auxiliaries.py
58 lines (39 loc) · 1.33 KB
/
questions-auxiliaries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 27 23:52:51 2015
@author: Kirill
"""
import sys
#from nltk.parse import stanford
import essayclasses.allessaysfile
import essayclasses.anessay
import essayclasses.asentence
exportedDB = 'data/all-essays.csv'
allEssaysFile = essayclasses.allessaysfile.AllEssaysFile(exportedDB)
allEssays = allEssaysFile.essaysList()
auxiliaries = {'be', 'am', 'are', 'is', 'was', 'were', 'being',
'do', 'did', 'does', 'doing', 'have', 'had',
'has', 'having'}
modals = {'can','could','may', 'might', 'must', 'shall', 'should',
'will', 'would'}
arAux = 0
arQ = 0
nArAux = 0
nArQ = 0
for anEssay in allEssays:
thisEssay = essayclasses.anessay.AnEssay(anEssay)
questions = thisEssay.getQuestions()
for aQuestion in questions:
if thisEssay.isArabic():
arQ += 1
else:
nArQ += 1
for aWord in aQuestion.split():
for aux in auxiliaries:
if aWord == aux:
if thisEssay.isArabic():
arAux += 1
else:
nArAux += 1
print ('Arabic Aux per Question: ', arAux/arQ)
print ('Non-Arabic Aux per Question: ', nArAux/nArQ)