-
Notifications
You must be signed in to change notification settings - Fork 0
/
Document.java
168 lines (135 loc) · 5.03 KB
/
Document.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.regex.*;
public class Document {
private String path;
private String outputFolder;
private String outputFile;
private ArrayList<DataImage> badges;
private String unitTitle;
private UnitCodeHelper unitCodeHelper;
private String department;
private int levelofstudy;
private String programme;
private String semester;
public Document(String path) {
this.path = path;
this.outputFolder = (String.valueOf(path) + "-exports").replace(".", "-");
this.outputFile = String.valueOf(this.outputFolder) + "\\exported";
this.badges = new ArrayList<DataImage>();
this.unitTitle = "GenericUnitTitle";
this.unitCodeHelper = new UnitCodeHelper();
this.department = "GenericDepartment";
this.levelofstudy = 0;
this.programme = "GenericProgramme";
this.semester = "GenericSemester";
}
void getImages() throws IOException, InterruptedException {
// System.out.println("\n-----\nCreating output directory\n-----\n");
// System.out.println((this.outputFolder).toString());
new File(this.outputFolder).mkdirs();
// System.out.println("\n-----\nGetting document info\n-----\n");
getDocInfo();
// System.out.println("\n-----\nExporting all images as PNG\n-----\n");
// System.out.println("pdfimages.exe \"" + this.path + "\" \"" + this.outputFile
// + "\"");
Runtime.getRuntime().exec("3rdbinaries\\pdfimages.exe \"" + this.path + "\" \"" + this.outputFile + "\"")
.waitFor();
}
private void getDocInfo() throws InterruptedException, IOException {
Runtime.getRuntime().exec("3rdbinaries\\pdftotext.exe -q -l 1 -raw -table -lineprinter -linespacing 1 \""
+ this.path + "\" \"" + this.outputFile + ".txt\"").waitFor();
String content = readFile(this.outputFile + ".txt", StandardCharsets.UTF_8);
if (content.length() == 1)
return;
// using a character that is highly unlikely to be used as a delimiter to
// replace big empty space
content = content.trim().replaceAll("\\s{2,}", "█");
// using our delimiter to find all words of the unit title without knowing the
// next field, parsing errors occur when the unit title contains more than 1
// spaces
this.unitTitle = between(content, "UNIT TITLE█", "█");
this.unitCodeHelper = new UnitCodeHelper(findUnitCode(content, "UNIT CODE"));
// we use the information derived from the unit code to achieve data uniformity
this.department = this.unitCodeHelper.getDepartment();
this.levelofstudy = this.unitCodeHelper.getLevelofstudy();
this.programme = this.unitCodeHelper.getProgramme();
this.semester = findSemester(content, "SEMESTER/SESSION");
}
private String readFile(String path, Charset encoding) throws IOException {
byte[] encoded = Files.readAllBytes(Paths.get(path));
return new String(encoded, encoding);
}
private String between(String original, String a, String b) {
Pattern pattern = Pattern.compile(a + "(.*?)" + b, Pattern.DOTALL);
Matcher matcher = pattern.matcher(original);
while (matcher.find()) {
return matcher.group(1);
}
return null;
}
private String findSemester(String str, String word) {
Pattern p = Pattern.compile(word + "\\W+(\\w+)");
Matcher m = p.matcher(str);
if (m.find())
if (m.group(1).equalsIgnoreCase("Fall")) {
return "Autumn";
} else {
return m.group(1);
}
else
return "Whole Session";
}
private String findUnitCode(String str, String word) {
Pattern p = Pattern.compile(word + "\\W+(\\w+)");
Matcher m = p.matcher(str);
if (m.find()) {
return m.group(1);
} else {
return null;
}
}
public String getOutputFolder() {
return outputFolder;
}
public void addToBadges(DataImage x) {
this.badges.add(x);
}
public void showBadges() {
System.out.println("Totally " + getNumberOfBadges() + " badges in this document: " + this.badges.toString());
}
public void showUnitInfo() {
System.out.println("\n-----\nUnit Information\n-----\n");
System.out.println("Unit Title: " + this.unitTitle);
System.out.println("Unit Code: " + this.unitCodeHelper.getUnitCode());
System.out.println("Department: " + this.department);
System.out.println("Level of Study: " + this.levelofstudy);
System.out.println("Programme: " + this.programme);
System.out.println("Semester/Session: " + this.semester);
System.out.println();
}
public int getNumberOfBadges() {
return this.badges.size();
}
public void deleteLeftovers() {
File directory = new File(this.outputFolder);
deleteDirectory(directory);
}
private boolean deleteDirectory(File dir) {
if (dir.isDirectory()) {
File[] children = dir.listFiles();
for (int i = 0; i < children.length; i++) {
boolean success = deleteDirectory(children[i]);
if (!success) {
return false;
}
}
}
return dir.delete();
}
}