Permalink
Browse files

Use a better strategy for separating transcripts from SOBI files.

  • Loading branch information...
1 parent 11c9e83 commit b12893c25200232b7338258c8e3c7f7ca465563b @GraylinKim GraylinKim committed Apr 12, 2012
Showing with 7 additions and 13 deletions.
  1. +7 −13 src/main/java/gov/nysenate/openleg/scripts/Collate.java
@@ -54,22 +54,16 @@ public static void main(String[] args) {
BufferedReader br = null;
try {
- br = new BufferedReader(new StringReader(FileUtils.readFileToString(file, "UTF-8")));
- // Sort the different file parts to their destination folders
- in = br.readLine();
-
- if (in==null) {
- br.close();
- logger.error("Moving empty file: "+file);
- FileUtils.moveFileToDirectory(file, destDirectory, false);
-
- } else if (!in.startsWith("<?xml")) {
- br.close();
+ if (!file.getName().startsWith("SOBI")) {
logger.info("Moving transcript: "+file);
FileUtils.moveFileToDirectory(file, transcripts, false);
} else {
- do {
+ br = new BufferedReader(new StringReader(FileUtils.readFileToString(file, "UTF-8")));
+ // Sort the different file parts to their destination folders
+ in = br.readLine();
+
+ while((in = br.readLine()) != null) {
if(in.matches("<sencalendar.+")) {
File calendar = new File(calendars, file.getName()+"-calendar-"+inc+".xml");
logger.info("Extracting calendar: "+calendar);
@@ -88,7 +82,7 @@ else if(in.matches("<senannotated.+")) {
write(getXml("</senannotated.+", in, br), annotation);
inc++;
}
- } while((in = br.readLine()) != null);
+ }
br.close();
logger.info("Moving bill: "+file);

0 comments on commit b12893c

Please sign in to comment.