-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdf_openaction.py
102 lines (90 loc) · 3.95 KB
/
pdf_openaction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
# encoding:UTF-8
"""
This script extracts all OpenAction and AAs (Additional Actions) from a supplied PDF file.
The script finds all OpenActions and AAs by walking the PDF tree and looking for all
OpenActions and AAs, as specified in Section 3.6.1 and 3.6.2 of PDF v1.7.
"""
import sys
import StringIO
import warnings
import PyPDF2
from PyPDF2.generic import DictionaryObject, ArrayObject, IndirectObject
from PyPDF2.utils import PdfReadError
def walk_pdf_tree(node, already_visited=None):
# Indirect objects can refer to each other in a loop.
# Maintain a set of visited nodes to avoid a stack overflow
if already_visited is None:
already_visited = set()
yield node
# Walk through the node's children
if isinstance(node, DictionaryObject):
for k, v in node.iteritems():
for node in walk_pdf_tree(v, already_visited):
yield node
elif isinstance(node, ArrayObject):
for v in node:
for node in walk_pdf_tree(v, already_visited):
yield node
elif isinstance(node, IndirectObject):
idnum = node.idnum
if idnum in already_visited:
pass
else:
already_visited.add(idnum)
# Dereferencing an object can sometimes fail
try:
v = node.getObject()
except PdfReadError:
pass
else:
for node in walk_pdf_tree(v, already_visited):
yield node
def find_openactions(pdf_object):
openactions = list()
root = pdf_object.trailer
# Ignore warnings from failed Object dereferences
with warnings.catch_warnings():
warnings.simplefilter("ignore")
for node in walk_pdf_tree(root):
if isinstance(node, DictionaryObject) and '/OpenAction' in node.keys():
# As per PDF 1.7, section 3.6.1, OpenAction can be an an Array or dictionary
# The value is either an array defining a destination (see Section 8.2.1, “Destinations”)
# or an action dictionary representing an action (Section 8.5, “Actions”)
action = node['/OpenAction']
if isinstance(action, DictionaryObject):
actionType = action['/S']
# Possibly dangerous types include Launch, URI, and JavaScript
openactions.append(actionType)
elif isinstance(action, ArrayObject):
actionType = "Destination"
# Destinations may be associated with:
# outline items (see Section 8.2.2, “Document Outline”),
# annotations (“Link Annotations” on page 622), or
# actions (“Go-To Actions” on page 654 and “Remote Go-To Actions” on page 655).
# These are not fully supported
openactions.append(actionType)
if isinstance(node, DictionaryObject) and '/AA' in node.keys():
# As per PDF 1.7, section 3.6.2 (page 147), AA (Additional Actions) can be specified
# when a _trigger event_ occurs.
# Didier Stevens writes (https://blog.didierstevens.com/programs/pdf-tools/) that these
# can also be used maliciously.
# No samples yet to test this
actionType = "Unsupported Additional Action (AA)!"
openactions.append(actionType)
return openactions
def extract_openactions(fpath):
with open(fpath, 'rb') as fh:
src_pdf_blob = fh.read()
pdf_data = PyPDF2.PdfFileReader(StringIO.StringIO(src_pdf_blob))
openactions = find_openactions(pdf_data)
if openactions:
print "\n".join(openactions)
def main():
if len(sys.argv) < 2:
print "USAGE: %s %s <filename>" % (sys.executable, sys.argv[0])
sys.exit(1)
fpath = sys.argv[1]
extract_openactions(fpath)
if __name__ == "__main__":
main()