/
CDObject.jl
145 lines (117 loc) · 3.59 KB
/
CDObject.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
export CDTextString, CDDate, CDRect, getUTCTime
"""
```
CDTextString
```
PDF file format structure provides two primary string types. Hexadecimal string `CosXString`
and literal string `CosLiteralString`. However, these are mere binary representation of
string types without having any encoding associated for semantic representation.
Determination of encoding is carried out mostly by associated fonts and character maps in
the content stream. There are also strings used in descriptions and other attributes of a
PDF file where no font or mapping information is provided. This represents the string type
in such situations. Typically, strings in PDFs are of 3 types.
1. Text string
a. PDDocEncoded string - Similar to ISO_8859-1
b. UTF-16BE strings
2. ASCII string
3. Byte string - Pure binary data no interpretation
1 and 2 can be represented by the `CDTextString`. `convert` methods are provided to
translate the `CosString` to `CDTextString`
*Ref*: PDF Specification Section 7.9.2
*Note*: Internally `CDTextString` is a `String` object of julia.
"""
const CDTextString = String
using Dates
using Dates: CompoundPeriod
using Rectangle
using Printf
import Base: ==, isless, show
"""
```
CDDate
```
Internally represented as string objects, these are timezone enabled date and time objects.
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
"""
struct CDDate
d::DateTime
tz::CompoundPeriod
ahead::Bool
CDDate(d::DateTime, tz::CompoundPeriod, ahead::Bool = true) = new(d, tz, ahead)
end
const CDDATE_REGEX =
r"D:(?<dt>(\d\d){2,7})(?<tz>((?<ahead>[+-])(?<tzh>\d\d)('(?<tzm>\d\d))?|Z))?"
"""
```
CDDate(s::CDTextString)
```
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
# Example
```
julia> date = CDDate("D:20190425173659+05'30")
D:20190425173659+05'30
julia> date.d
2019-04-25T17:36:59
julia> date.tz
5 hours, 30 minutes
julia> date.ahead
true
```
"""
function CDDate(str::CDTextString)
m = match(CDDATE_REGEX, str)
m === nothing && error(E_INVALID_DATE)
ut, tzh, tzm = m[:ahead], m[:tzh], m[:tzm]
tzhr = tzh === nothing ? Hour(0) : Hour(parse(Int, tzh))
tzhm = tzm === nothing ? Minute(0) : Minute(parse(Int, tzm))
tz = CompoundPeriod(tzhr, tzhm)
ahead = !(ut == "-")
return CDDate(DateTime(m[:dt], dateformat"yyyymmddHHMMSS"), tz, ahead)
end
function Base.show(io::IO, dt::CDDate)
print(io, "D:")
Dates.format(io, dt.d, dateformat"YYYYmmddHHMMSS")
tzp = dt.tz.periods
np = length(tzp)
@assert np <= 2
np == 0 && return print(io, "Z")
print(io, dt.ahead ? "+" : "-")
tzh, tzm = 0, 0
if np == 2
tzh, tzm = tzp[1].value, tzp[2].value
else
p1 = tzp[1]
p1 isa Hour && (tzh = p1.value)
p1 isa Minute && (tzm = p1.value)
end
tzs = @sprintf "%02d'%02d" tzh tzm
print(io, tzs)
end
"""
```
getUTCTime(d::CDDate) -> CDDate
```
Removes the timezone information and returns the CDDate at UTC.
# Example
```
julia> getUTCTime(CDDate("D:20190425173659+05'30"))
D:20190425120659Z
```
"""
getUTCTime(d::CDDate) =
CDDate(d.ahead ? (d.d - d.tz) : (d.d + d.tz), CompoundPeriod())
Base.isless(d1::CDDate, d2::CDDate) = isless(getUTCTime(d1).d, getUTCTime(d2).d)
Base.:(==)(d1::CDDate, d2::CDDate) = !isless(d1, d2) && !isless(d2, d1)
"""
```
CDRect
```
`CosArray` representation of a rectangle in the lower left and upper right point format
*Note*: `CDRect` maps to a `Rect` object in the `Rectangle` package.
# Example
```
julia> CDRect(CosArray(CosObject[CosInt(0), CosInt(0), CosInt(840), CosFloat(640)]))
Rect:[0.0 0.0 840.0 640.0]
```
"""
const CDRect = Rect