Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
sagnik committed Aug 26, 2016
1 parent ccb5e8e commit f0dab66
Show file tree
Hide file tree
Showing 46 changed files with 48 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,26 @@ import edu.psu.sagnik.research.data.RectangleOTL
*/

object BB {
def Line(p0: Point2D.Float, p1: Point2D.Float) = {
def Line(p0: Point2D.Float, p1: Point2D.Float, pageHeight: Float) = {
val xMin = scala.math.min(p0.x, p1.x)
val yMin = scala.math.min(p0.y, p1.y)
val xMax = scala.math.max(p0.x, p1.x)
val yMax = scala.math.max(p0.x, p1.x)
val yMax = scala.math.max(p0.y, p1.y)
RectangleOTL(
xTopLeft = xMin,
yTopLeft = yMin,
yTopLeft = pageHeight - yMax,
widthRight = xMax - xMin,
heightDown = yMax - yMin
)
}

def Curve(start: Point2D.Float, end: Point2D.Float, cp1: Point2D.Float, cp2: Point2D.Float) = {
def Curve(start: Point2D.Float, end: Point2D.Float, cp1: Point2D.Float, cp2: Point2D.Float, pageHeight: Float) = {
val xs = List(start.x, end.x, cp1.x, cp2.x)
val ys = List(start.y, end.y, cp1.y, cp2.y)
val xTopLeft = xs.min
val yTopLeft = ys.min
RectangleOTL(
xTopLeft = xTopLeft,
yTopLeft = yTopLeft,
yTopLeft = pageHeight - ys.max,
widthRight = xs.max - xs.min,
heightDown = ys.max - ys.min
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.apache.pdfbox.pdmodel.graphics.image.PDImage
import org.apache.pdfbox.util.Matrix

import java.util.logging.{ Level, Logger }
import Level.{ INFO, FINE }

/**
* Created by schoudhury on 6/22/16.
Expand Down Expand Up @@ -48,10 +47,10 @@ class ProcessPaths(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage) {
currentSubPath = Some(
PDShape(
segments = List(
PDLine(fp(p0), fp(p1), BB.Line(fp(p0), fp(p1))),
PDLine(fp(p1), fp(p2), BB.Line(fp(p1), fp(p2))),
PDLine(fp(p2), fp(p3), BB.Line(fp(p2), fp(p3))),
PDLine(fp(p3), fp(p0), BB.Line(fp(p3), fp(p0)))
PDLine(fp(p0), fp(p1), BB.Line(fp(p0), fp(p1), page.getBBox.getHeight)),
PDLine(fp(p1), fp(p2), BB.Line(fp(p1), fp(p2), page.getBBox.getHeight)),
PDLine(fp(p2), fp(p3), BB.Line(fp(p2), fp(p3), page.getBBox.getHeight)),
PDLine(fp(p3), fp(p0), BB.Line(fp(p3), fp(p0), page.getBBox.getHeight))
),
fromReCommand = true
)
Expand Down Expand Up @@ -111,13 +110,13 @@ class ProcessPaths(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage) {
case Some(csp) => currentSubPath = Some(
csp.copy(
segments = csp.segments :+
PDLine(currentPoint, fp(new Point2D.Float(x, y)), BB.Line(currentPoint, fp(new Point2D.Float(x, y))))
PDLine(currentPoint, fp(new Point2D.Float(x, y)), BB.Line(currentPoint, fp(new Point2D.Float(x, y)), page.getBBox.getHeight))
)
)
case _ => currentSubPath = Some( //current sub path is empty. We need to start a new PDShape i.e. subpath
PDShape(
segments = List(
PDLine(currentPoint, fp(new Point2D.Float(x, y)), BB.Line(currentPoint, fp(new Point2D.Float(x, y))))
PDLine(currentPoint, fp(new Point2D.Float(x, y)), BB.Line(currentPoint, fp(new Point2D.Float(x, y)), page.getBBox.getHeight))
),
fromReCommand = false
)
Expand All @@ -137,7 +136,7 @@ class ProcessPaths(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage) {
endPoint = fp(new Point2D.Float(x3, y3)),
controlPoint1 = fp(new Point2D.Float(x1, y1)),
controlPoint2 = fp(new Point2D.Float(x2, y2)),
BB.Curve(currentPoint, fp(new Point2D.Float(x3, y3)), fp(new Point2D.Float(x1, y1)), fp(new Point2D.Float(x2, y2)))
BB.Curve(currentPoint, fp(new Point2D.Float(x3, y3)), fp(new Point2D.Float(x1, y1)), fp(new Point2D.Float(x2, y2)), page.getBBox.getHeight)
)
)
)
Expand All @@ -149,7 +148,7 @@ class ProcessPaths(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage) {
endPoint = fp(new Point2D.Float(x3, y3)),
controlPoint1 = fp(new Point2D.Float(x1, y1)),
controlPoint2 = fp(new Point2D.Float(x2, y2)),
BB.Curve(currentPoint, fp(new Point2D.Float(x3, y3)), fp(new Point2D.Float(x1, y1)), fp(new Point2D.Float(x2, y2)))
BB.Curve(currentPoint, fp(new Point2D.Float(x3, y3)), fp(new Point2D.Float(x1, y1)), fp(new Point2D.Float(x2, y2)), page.getBBox.getHeight)
)
),
fromReCommand = false
Expand All @@ -172,16 +171,17 @@ class ProcessPaths(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage) {
val startPoint = csp.segments.head.startPoint
currentSubPath = Some(
csp.copy(
segments = csp.segments :+ PDLine(currentPoint, startPoint, BB.Line(currentPoint, startPoint))
segments = csp.segments :+ PDLine(currentPoint, startPoint, BB.Line(currentPoint, startPoint, page.getBBox.getHeight))
)
)
currentPoint = startPoint
subPathComplete()

case _ => {
case _ =>
logger.warning("A path encountered a close operator before it even started. " +
"It will henceforth be known as Rickon Stark Blvd."); subPathComplete()
} //should never reach here}
"It will henceforth be known as Rickon Stark Blvd.")
subPathComplete()
//should never reach here}

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class ProcessRaster(page: PDPage) extends PDFGraphicsStreamEngine(page: PDPage)

bb = RectangleOTL(
xTopLeft = getCTM.getTranslateX - page.getCropBox.getLowerLeftX,
yTopLeft = getCTM.getTranslateY - page.getCropBox.getLowerLeftY,
yTopLeft = page.getBBox.getHeight - (getCTM.getTranslateY - page.getCropBox.getLowerLeftY) - getCTM.getScaleY,
widthRight = getCTM.getScaleX,
heightDown = getCTM.getScaleY
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ object CalculateBB {

def apply(texts: List[TextSegment]): Option[RectangleOTL] =
if (texts.nonEmpty) {
val xTopLeft = texts.map(x => x.bb.xTopLeft).min
val yTopLeft = texts.map(x => x.bb.yTopLeft).min
val width = texts.map(t => t.bb.xTopLeft + t.bb.widthRight).max - xTopLeft
val height = texts.map(t => t.bb.yTopLeft + t.bb.heightDown).max - xTopLeft
val xTopLeft = texts.map(t => t.bb.xTopLeft).min
val yTopLeft = texts.map(t => t.bb.yTopLeft).min
val width = texts.map(t => (t.bb.xTopLeft + t.bb.widthRight)).max - xTopLeft
val height = texts.map(t => (t.bb.yTopLeft + t.bb.heightDown)).max - yTopLeft
Some(
RectangleOTL(
xTopLeft = xTopLeft,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ object TextPositionBB {

def approximate(tP: TextPosition, page: PDPage) = RectangleOTL(
xTopLeft = tP.getXDirAdj, // text can be rotated, which will change the x,y coordinates and bounding boxes
yTopLeft = tP.getYDirAdj,
yTopLeft = tP.getYDirAdj - tP.getHeightDir,
widthRight = tP.getWidthDirAdj,
heightDown = tP.getHeightDir //(tP.getYDirAdj - tP.getHeightDir)+tP.getHeightDir
)
Expand Down Expand Up @@ -64,11 +64,12 @@ object TextPositionBB {
}

val s = rotateAT.createTransformedShape(flipAT.createTransformedShape(at.createTransformedShape(rect)))
val heightDown = s.getBounds2D.getMaxY.toFloat - s.getBounds2D.getMinY.toFloat
RectangleOTL(
xTopLeft = s.getBounds2D.getMinX.toFloat,
yTopLeft = s.getBounds2D.getMinY.toFloat,
yTopLeft = s.getBounds2D.getMinY.toFloat - heightDown,
widthRight = s.getBounds2D.getMaxX.toFloat - s.getBounds2D.getMinX.toFloat,
heightDown = s.getBounds2D.getMaxY.toFloat - s.getBounds2D.getMinY.toFloat
heightDown = heightDown
)
}

Expand Down Expand Up @@ -116,12 +117,13 @@ object TextPositionBB {
else {
val xTopLeft = afterPageTransformation.map(_.getBounds.x).min
val yTopLeft = afterPageTransformation.map(_.getBounds.y).min
val heightDown = afterPageTransformation.map(a => a.getBounds.y + a.getBounds.height).max - yTopLeft
Some(
RectangleOTL(
xTopLeft = xTopLeft,
yTopLeft = yTopLeft,
yTopLeft = yTopLeft - heightDown,
widthRight = afterPageTransformation.map(a => a.getBounds.x + a.getBounds.width).max - xTopLeft,
heightDown = afterPageTransformation.map(a => a.getBounds.y + a.getBounds.height).max - yTopLeft
heightDown = heightDown
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@ import java.awt.Color
import java.io.File

import org.apache.pdfbox.pdmodel.PDDocument
import java.util.logging.{Level, Logger}
import java.util.logging.{ Level, Logger }

import edu.psu.sagnik.research.data.RectangleOTL
import edu.psu.sagnik.research.pdsimplify.impl.ProcessDocument
import edu.psu.sagnik.research.pdsimplify.path.model.PDPath
import edu.psu.sagnik.research.pdsimplify.raster.model.PDRasterImage
import edu.psu.sagnik.research.pdsimplify.text.model.PDParagraph
import edu.psu.sagnik.research.pdwriters.writers.image.CreateMarkedPNG
import edu.psu.sagnik.research.pdwriters.writers.pdf.CreateMarkedPDF

import scala.util.{Failure, Success}
import scala.util.{ Failure, Success }

/**
* Created by schoudhury on 6/27/16.
Expand All @@ -27,14 +24,14 @@ object ShowResults {
def printExtractionResult(pdLoc: String, pageNum: Int, bbs: List[RectangleOTL], c: Color, qualifier: String) = {
val document = PDDocument.load(new File(pdLoc))
val page = document.getPage(pageNum)
CreateMarkedPDF(pdLoc, document, pageNum, page, bbs, c, qualifier)
CreateMarkedPNG(pdLoc, document, pageNum, page, bbs, c, qualifier)
logger.fine(s"created ${qualifier.substring(0, qualifier.length - 1)} marked PDF")
}

def main(args: Array[String]): Unit = {
val DEFAULT_LOC = "/Users/schoudhury/codes/res-doc-sci/res-app-cmd/src/test/resources/008baad5-3fdb-4aea-9311-3ef7499e3f1f.pdf"
val DEFAULT_LOC = "src/test/resources/test1.pdf"
//"/Users/schoudhury/hassan/C10-2042.pdf"
val DEFAULT_PAGE_NUM = 3
val DEFAULT_PAGE_NUM = 5

val pdLoc = if (args.length > 1) args(0) else DEFAULT_LOC
val pageNum = if (args.length == 2) args(1).toInt else DEFAULT_PAGE_NUM
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package edu.psu.sagnik.research.pdwriters.writers.image

import java.awt.geom.Rectangle2D
import java.awt.{BasicStroke, Color}
import java.io.{File}
import java.util.logging.{Level, Logger}
import java.awt.{ BasicStroke, Color }
import java.io.{ File }
import java.util.logging.{ Level, Logger }
import javax.imageio.ImageIO

import edu.psu.sagnik.research.data.RectangleOTL
import org.apache.pdfbox.pdmodel.{PDDocument, PDPage}
import org.apache.pdfbox.pdmodel.{ PDDocument, PDPage }
import org.apache.pdfbox.rendering.PDFRenderer

/**
Expand All @@ -31,7 +31,7 @@ object CreateMarkedPNG {
val pH = page.getBBox.getHeight

for (bb <- bbs) {
logger.info(s"[$tElemType] ${(bb.xTopLeft, bb.yTopLeft, bb.widthRight, bb.heightDown)}")
//logger.info(s"[$tElemType] ${(bb.xTopLeft, bb.yTopLeft, bb.widthRight, bb.heightDown)}")
g2d.draw(new Rectangle2D.Float(bb.xTopLeft, bb.yTopLeft, bb.widthRight, bb.heightDown))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import java.awt.geom.Point2D

import edu.psu.sagnik.research.data.RectangleOTL
import edu.psu.sagnik.research.pdsimplify.model.PDDocumentSimple
import edu.psu.sagnik.research.pdsimplify.model.Rectangle
import edu.psu.sagnik.research.pdsimplify.path.model.PDLine
import org.json4s.JsonDSL._
import org.json4s.JsonDSL._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package edu.psu.sagnik.research.pdwriters.writers.pdf

import java.awt.Color
import java.io.IOException
import java.util.logging.{Level, Logger}
import java.util.logging.{ Level, Logger }

import edu.psu.sagnik.research.data.RectangleOTL
import org.apache.pdfbox.pdmodel.{PDDocument, PDPage, PDPageContentStream}
import org.apache.pdfbox.pdmodel.{ PDDocument, PDPage, PDPageContentStream }
import org.apache.pdfbox.util.Matrix

/**
Expand All @@ -21,9 +21,9 @@ object CreateMarkedPDF {
private def drawRect(content: PDPageContentStream, color: Color, rect: RectangleOTL, page: PDPage, fill: Boolean) {
content.addRect(
rect.xTopLeft + page.getCropBox.getLowerLeftX,
page.getCropBox.getHeight - (rect.yTopLeft + page.getCropBox.getLowerLeftY),
rect.x2 - rect.x1,
rect.y1 - rect.y2
page.getCropBox.getHeight - (rect.yTopLeft + page.getCropBox.getLowerLeftY) - rect.heightDown,
rect.widthRight,
rect.heightDown
)
//remember the addRect is drawing a rectangle with x,y at bottom left. Also, we adjusted the rect for cropbox before. Since we are not changing the
//content stream, that adjustment has to be _re_adjusted.
Expand All @@ -36,7 +36,7 @@ object CreateMarkedPDF {
}
}

def apply(docLoc: String, document: PDDocument, pageNum: Int, page: PDPage, bbs: List[Rectangle], color: Color, tElemType: String): Unit = {
def apply(docLoc: String, document: PDDocument, pageNum: Int, page: PDPage, bbs: List[RectangleOTL], color: Color, tElemType: String): Unit = {
bbs.foreach(bb => {
val content = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, false)
drawRect(content, color, bb, page, fill = false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ object TextHelper {
"\""

def getLocationString(c: PDChar, h: Float): String = "y=\"" +
(c.bb.y2) +
(c.bb.yTopLeft) +
"\" x=\"" +
c.bb.x1 +
c.bb.xTopLeft +
"\">"

def getTransformString(c: PDChar): String = "transform=\"rotate(" + c.style.rotation.toString + ")\""
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-chars.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-chars.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-lines.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-lines.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-paragraphs.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-paragraphs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-paths.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-rasters.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-rasters.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-words.pdf
Binary file not shown.
Binary file modified writers/src/test/resources/LoremIpsum-page-0-words.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed writers/src/test/resources/test.pdf
Binary file not shown.
Binary file not shown.
Binary file added writers/src/test/resources/test1-page-5-chars.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file added writers/src/test/resources/test1-page-5-lines.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added writers/src/test/resources/test1-page-5-paths.pdf
Binary file not shown.
Binary file added writers/src/test/resources/test1-page-5-paths.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file added writers/src/test/resources/test1-page-5-words.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit f0dab66

Please sign in to comment.