Skip to content

Commit

Permalink
added some exception handling
Browse files Browse the repository at this point in the history
  • Loading branch information
sagnik committed Jul 20, 2016
1 parent 55112de commit 5956ad4
Show file tree
Hide file tree
Showing 15 changed files with 53 additions and 34 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ This will produce five PNGs of the form `<*-page-*>-chars,words,lines,paragraphs

###How to Use
------------
Put `"edu.psu.sagnik.research" %% "pdsimplifyparser" % "<version-number>" exclude("javax.jms", "jms") exclude("com.sun.jdmk", "jmxtools") exclude("com.sun.jmx", "jmxri")` where <version-number> is the latest version number from `Build.sbt`. Current is `0.0.4` (subject to change).
Put `"edu.psu.sagnik.research" %% "pdsimplifyparser" % "<version-number>" exclude("javax.jms", "jms") exclude("com.sun.jdmk", "jmxtools") exclude("com.sun.jmx", "jmxri")` where <version-number> is the latest version number from `Build.sbt`. Current is `0.0.5` (subject to change).

in your dependencies and make sure you have `https://oss.sonatype.org/content/repositories/releases/` in your resolvers.

Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
version in ThisBuild := {
val major = 0
val minor = 0
val patch = 4
val patch = 5
s"$major.$minor.$patch"
} //added patch for rasters that we were not getting before.
} //added some error handling for font information.

scalaVersion in ThisBuild := "2.11.8"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import edu.psu.sagnik.research.pdsimplify.raster.impl.ProcessRaster
import edu.psu.sagnik.research.pdsimplify.text.impl.ProcessText
import org.apache.pdfbox.pdmodel.{PDDocument, PDPage}

import scala.util.Try

/**
* Created by schoudhury on 7/1/16.
*/
Expand All @@ -29,12 +31,14 @@ object ProcessDocument {
paragraphs=paragraphs,
gPaths=pdGraphicsPaths,
rasters=rasters,
bb=Rectangle(
page.getBBox.getLowerLeftX,
page.getBBox.getHeight-page.getBBox.getUpperRightY,
page.getBBox.getUpperRightX,
page.getBBox.getUpperRightY
)
bb=
Rectangle(
page.getBBox.getLowerLeftX,
page.getBBox.getHeight-page.getBBox.getUpperRightY,
page.getBBox.getUpperRightX,
page.getBBox.getUpperRightY
)

)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import edu.psu.sagnik.research.pdsimplify.path.model.PDPath
import edu.psu.sagnik.research.pdsimplify.raster.model.PDRasterImage
import edu.psu.sagnik.research.pdsimplify.text.model.PDParagraph

import scala.util.Try

/**
* Created by schoudhury on 7/1/16.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,37 @@
package edu.psu.sagnik.research.pdsimplify.text.impl

import java.util.logging.{Level, Logger}

import edu.psu.sagnik.research.pdsimplify.path.impl.CreatePathStyle
import edu.psu.sagnik.research.pdsimplify.text.model.{PDChar, PDCharStyle, PDFontInfo}
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState
import org.apache.pdfbox.text.TextPosition

import scala.util.{Failure, Success, Try}

/**
* Created by schoudhury on 6/30/16.
*/
object CreateTextStyle {
lazy val logger = Logger.getLogger("pdsimplify.text.impl.CreateTextStyle")
logger.setLevel(Level.ALL)
//TODO: change these to proper default values
val DEFAULT_FONT_NAME="times new roman"
val DEFAULT_FONT_SIZE=10f
val DEFAULT_FONT_FAMILY="times"
val DEFAULT_IS_BOLD=false
val DEFAULT_IS_ITALIC=false
val DEFAULT_FONT_WEIGHT=100f
val DEFAULT_ROTATION=0f

def apply(x:TextPosition,gs:PDGraphicsState):PDCharStyle=PDCharStyle(
font=PDFontInfo(
fontName= x.getFont.getName,
fontSize = x.getFontSizeInPt,
fontFamily = x.getFont.getFontDescriptor.getFontFamily,
isBold=x.getFont.getFontDescriptor.isForceBold, //TODO: text can be made look bold or italic by modifying the text or the text line matrix
isItalic=x.getFont.getFontDescriptor.isItalic,
fontWeight=x.getFont.getFontDescriptor.getFontWeight
fontName= Try(x.getFont.getName) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_FONT_NAME },
fontSize = Try(x.getFontSizeInPt) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_FONT_SIZE },
fontFamily = Try(x.getFont.getFontDescriptor.getFontFamily) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_FONT_FAMILY },
isBold=Try(x.getFont.getFontDescriptor.isForceBold) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_IS_BOLD }, //TODO: text can be made look bold or italic by modifying the text or the text line matrix
isItalic=Try(x.getFont.getFontDescriptor.isItalic) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_IS_ITALIC },
fontWeight=Try(x.getFont.getFontDescriptor.getFontWeight) match {case Success(v) => v; case Failure(e) => logger.warning(e.getMessage); DEFAULT_FONT_WEIGHT }
),
fill = if (gs.getTextState.getRenderingMode.isFill)
CreatePathStyle.getHexRGB(gs.getNonStrokingColor)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,16 @@ class ProcessText(page:PDPage) extends PDFTextStripper {
else None
}

/*
@Override @throws[IOException]
override protected def writeString(s: String, textPositions: util.List[TextPosition]): Unit = {
//this has to be done because sometimes the writeLine() method is not calling the writeWords() method at all, especially
//when the string has space characters.
val tPs=textPositions.asScala.toList
tPs.foreach(tP=>{
println("<"+tP.getUnicode+"/>")
if (!" ".equals(tP)){
currentChars=currentChars :+ PDChar(
content=tP.getUnicode,
bb=TextPositionBB.approximate(tP),
bb=TextPositionBB.approximate(tP,page),
glyphBB=TextPositionBB.glyphBased(tP,page),
CreateTextStyle(tP,getGraphicsState)
)
Expand All @@ -121,8 +119,8 @@ class ProcessText(page:PDPage) extends PDFTextStripper {
}
})
}
*/

/*
@Override @throws[IOException]
override protected def writeString(s: String, textPositions: util.List[TextPosition]): Unit = {
//this has to be done because sometimes the writeLine() method is not calling the writeWords() method at all, especially
Expand All @@ -148,6 +146,7 @@ class ProcessText(page:PDPage) extends PDFTextStripper {
)
super.writeString(s)
}
*/

def stripPage(pdPageNum: Int, document: PDDocument): List[PDParagraph] = {
setStartPage(pdPageNum + 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@ import java.awt.Color
import java.io.File

import edu.psu.sagnik.research.pdsimplify.model.Rectangle
import edu.psu.sagnik.research.pdsimplify.path.impl.ProcessPaths
import edu.psu.sagnik.research.pdsimplify.raster.impl.ProcessRaster
import edu.psu.sagnik.research.pdsimplify.text.impl.ProcessText
import edu.psu.sagnik.research.pdwriters.writers.pdf.CreateMarkedPDF
import edu.psu.sagnik.research.pdwriters.writers.svg.CreateSVG
import org.apache.pdfbox.pdmodel.PDDocument
import java.util.logging.{Level, Logger}
import Level.{FINE, INFO}

import edu.psu.sagnik.research.pdsimplify.impl.ProcessDocument
import edu.psu.sagnik.research.pdsimplify.path.model.PDPath
import edu.psu.sagnik.research.pdsimplify.raster.model.PDRasterImage
import edu.psu.sagnik.research.pdsimplify.text.model.PDParagraph
import edu.psu.sagnik.research.pdwriters.writers.image.CreateMarkedPNG


import scala.util.{Failure, Success}

/**
* Created by schoudhury on 6/27/16.
*/
Expand All @@ -41,14 +42,14 @@ object ShowResults {
val document = PDDocument.load(new File(pdLoc))
val page = document.getPage(pageNum)

val paragraphs=new ProcessText(page).stripPage(pageNum,document)
val SimplifiedDocument=ProcessDocument(document)

val paragraphs=SimplifiedDocument.pages(pageNum).paragraphs
val rasters=SimplifiedDocument.pages(pageNum).rasters
val graphicsPaths=SimplifiedDocument.pages(pageNum).gPaths

val imFinder=new ProcessRaster(page)
imFinder.getImages()

val pathFinder=new ProcessPaths(page)
pathFinder.getPaths()
val segments=pathFinder.paths
val segments=graphicsPaths
.filter(x=> x.doPaint)
.flatMap(x=>x.subPaths)
.flatMap(x=>x.segments)
Expand Down Expand Up @@ -77,7 +78,7 @@ object ShowResults {

printExtractionResult(pdLoc,pageNum,paragraphs.map(_.bb),Color.CYAN,"paragraphs")

printExtractionResult(pdLoc,pageNum,imFinder.rasterImages.map(_.bb),Color.MAGENTA,"rasters")
printExtractionResult(pdLoc,pageNum,rasters.map(_.bb),Color.MAGENTA,"rasters")

printExtractionResult(pdLoc,pageNum,segments.map(_.bb),Color.ORANGE,"paths")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ object CreateJSON {

def apply(jsonLoc:String,pDS: PDDocumentSimple)= {
val jsonContent =
(
"pages" ->
pDS.pages.map {
p =>
Expand Down Expand Up @@ -94,6 +93,6 @@ object CreateJSON {
)
)
}
)

}
}
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added writers/src/test/resources/10.1.1.10.4597.pdf
Binary file not shown.

0 comments on commit 5956ad4

Please sign in to comment.