-
Notifications
You must be signed in to change notification settings - Fork 5
/
postprocess.py
49 lines (38 loc) · 1.08 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sys
import os
import numpy as np
from string import punctuation
file1 = sys.argv[1]
def process_class_names(instance):
try:
words = instance
words = words[:10]
clss = ''
start = words.index('class')
end = words.index('(')
clss = words[start+1]
for i in range(start+2, end):
clss = clss + ' ' + words[i]
original_clss = clss
clss = strip_punctuation(clss)
clss = " ".join(clss.split())
words = clss.split(' ')
clss = ''
for word in words:
if word[0].isupper():
clss = clss + ' ' + word
else:
clss = clss + word
clss = clss.strip()
instance = " ".join(instance).replace(original_clss, clss, 1).split(" ")
except:
pass
return instance
def strip_punctuation(s):
return ''.join(c for c in s if c not in punctuation)
def post_process_HS():
fileptr = open(file1, 'r')
for instance in fileptr:
process_class_names(instance)
if __name__ == "__main__":
post_process_HS()