-
Notifications
You must be signed in to change notification settings - Fork 78
/
Copy pathObjectStream.java
139 lines (127 loc) · 4.33 KB
/
ObjectStream.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/**********************************************************************
* Jhove - JSTOR/Harvard Object Validation Environment
* Copyright 2005 by JSTOR and the President and Fellows of Harvard College
**********************************************************************/
package edu.harvard.hul.ois.jhove.module.pdf;
import java.io.*;
import java.util.*;
/**
* This class implements the Object Stream, a new way of storing
* objects starting in PDF 1.4.
*
* An object stream can contain one or more objects, as described in
* Section 3.4 of the PostScript manual.
*
* JHOVE supports only FlateDecode as a filter for cross-reference
* streams. This is consistent with the implementation limitation
* described in Appendix H of the PDF manual for Acrobat 6 and earlier.
*
*
* @author Gary McGath
*
*/
public class ObjectStream {
private PdfStream _ostrm; // The underlying Stream object.
private PdfDictionary _dict;
private int _numObjects;
private int _firstOffset;
private Parser _parser;
private RandomAccessFile _raf;
/* Index of the object stream. Each element is an int[2],
* consisting of an object number and an offset.
*/
private Map _index;
/**
* Constructor.
*/
public ObjectStream(PdfStream ostrm, RandomAccessFile raf) {
_ostrm = ostrm;
_raf = raf;
_dict = ostrm.getDict ();
_parser = new Parser (new StreamTokenizer (raf, _ostrm.getStream()));
}
/** Checks the validity of the stream dictionary, and extracts
* information necessary for subsequent reading.
*/
public boolean isValid ()
{
try {
/* Type must be ObjStm */
PdfObject obj = _dict.get ("Type");
String typeStr = null;
if (obj instanceof PdfSimpleObject) {
typeStr = ((PdfSimpleObject) obj).getStringValue ();
}
if (!("ObjStm".equals (typeStr))) {
return false;
}
/* Number of objects */
obj = _dict.get ("N");
if (obj instanceof PdfSimpleObject) {
_numObjects = ((PdfSimpleObject) obj).getIntValue();
}
else {
return false;
}
/* Offset of first object */
obj = _dict.get ("First");
if (obj instanceof PdfSimpleObject) {
_firstOffset = ((PdfSimpleObject) obj).getIntValue();
}
else {
return false;
}
/* Optional refernce to object stream which this extends. */
obj = _dict.get ("Extends");
if (obj != null) {
/* What do we do with this? */
}
return true;
}
catch (Exception e) {
return false;
}
}
/** Reads the index of the object stream.
*/
public void readIndex ()
throws PdfException, IOException
{
Stream strm = _ostrm.getStream ();
strm.setFilters (_ostrm.getFilters ());
strm.initRead (_raf);
_index = new HashMap (_numObjects);
for (int i = 0; i < _numObjects; i++) {
/* If I'm reading it correctly, the numbers are
* encoded as ASCII strings separated by white space.
* I don't know what the restrictions, if any, are on
* the white space.
*/
Integer onum = new Integer (strm.readAsciiInt ());
Integer offset = new Integer (strm.readAsciiInt ());
_index.put (onum, offset);
}
}
/** Extracts an object from the stream. */
public PdfObject getObject (int objnum)
throws PdfException
{
Integer onum = new Integer (objnum);
Integer off = (Integer) _index.get (onum);
try {
if (off != null) {
int offset = off.intValue ();
_parser.seek (offset + _firstOffset);
PdfObject object = _parser.readObject (false);
/* Need to ensure the object number is set */
object.setObjNumber(objnum);
return object;
}
return null;
}
catch (IOException e) {
throw new PdfMalformedException
("Offset out of bounds in object stream");
}
}
}