-
Notifications
You must be signed in to change notification settings - Fork 6
/
pdf2fxl
executable file
·175 lines (166 loc) · 4.39 KB
/
pdf2fxl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/bin/bash
usage() {
echo "pdf2fxl <options> {PDF}"
echo ""
echo "options:"
echo "-z <zoom factor>"
echo "-o custom output directory"
echo "-r (raster text as image)"
echo "-e (create epub)"
echo "-p (omit poppler)"
echo "-d (debug mode)"
exit 1
}
real_dir() {
SOURCE="$1"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
echo "$( cd -P "$( dirname "$SOURCE" )" && pwd )"
}
# cygwin check
cygwin=false;
case "`uname`" in
CYGWIN*) cygwin=true;
esac
# specify options
while getopts ":z:o:redp" opt; do
case "${opt}" in
z)
ZOOM=${OPTARG}
;;
o)
OUTDIR=${OPTARG}
;;
r)
RASTERTEXT=yes
;;
e)
EPUB=yes
;;
d)
DEBUG=yes
;;
p)
EXECPOPPLER=no
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
shift $((OPTIND-1))
# check if argument for file is set
if [ -z $1 ]; then
echo "please specify a PDF file"
usage
fi
FILE=$1
BASENAME=$(basename $FILE .pdf)
if [ "$FILE" == "*.pdf" ]; then
echo "stimmt"
fi
# if zoom is not set use default value
if [ -z $ZOOM ]; then
ZOOM=2
fi
# set paths
DIR=$( real_dir "${BASH_SOURCE[0]}" )
if [ -z $OUTDIR ]; then
OUTDIR=$( real_dir "$FILE" )
fi
WORKDIR=$OUTDIR/$BASENAME.tmp
DEBUGDIR=$WORKDIR/debug
POPPLER=/usr/bin/pdftohtml
CALABASH=$DIR/calabash/calabash.sh
# some additional information
if [ "$DEBUG" == "yes" ]; then
echo "DEBUG MODE on"
echo "Input file: $FILE"
echo "Output dir: $OUTDIR"
echo "Workding Directory: $WORKDIR"
echo "Zoom factor: $ZOOM"
echo "Use Poppler: $EXECPOPPLER"
echo "Rasterize text: $RASTERTEXT"
echo "Create EPUB: $EPUB"
fi
if [ -d $WORDKDIR ]; then
rm -rf $WORKDIR
fi
mkdir -p $WORKDIR
cp -u $FILE $WORKDIR
# Windows or Unix-style paths
if $cygwin; then
DIR_URI=file:/$(cygpath -m $DIR)
WORKDIR=$(cygpath -ma $WORKDIR)
DEBUGDIR_URI=file:/$(cygpath -ma "$DEBUGDIR")
FILE=$(cygpath -ma "$FILE")
OUTDIR=$(cygpath -ma "$OUTDIR")
else
DIR_URI=$DIR
DEBUGDIR_URI=file:$(real_dir $DEBUGDIR)
FILE=file:$FILE
OUTDIR=$OUTDIR
fi
LOG=$WORKDIR/$BASENAME.log
# check if Poppler is installed properly and run
if [ ! -f $(which $POPPLER) ]; then
echo "Poppler pdftohtml not installed under $POPPLER."
exit 1
elif [ "$EXECPOPPLER" != "no" ]; then
echo "Converting $WORKDIR/$BASENAME.pdf to HTML"
$POPPLER -zoom $ZOOM \
-fmt jpg -fontfullname -c -p -nomerge \
"$WORKDIR/$BASENAME.pdf" \
&& echo "[info] PDF extraction finished" 2>&1 2>>$LOG
for html in "$WORKDIR"/*.html; do
cat "$html" | sed -r -e 's/<!--/<!\[CDATA\[/' | sed -r -e 's/-->/\]\]>/' > "$html"~
mv "$html"~ "$html"
done
fi
# use ImageMagick to generate images from pages
if [ "$RASTERTEXT" == "yes" ]; then
DENSITY=$((72 * $ZOOM))
echo "[info] Propagated image density: $DENSITY" >> $LOG
convert -verbose -format png -density "$DENSITY" "$WORKDIR/$BASENAME.pdf" "$WORKDIR/$BASENAME.png" 2>&1 2>>$LOG
echo "[info] Rasterizing pages finished" >> $LOG
fi
# check if Calabash exists and run
if [ ! -f $CALABASH ]; then
echo "Calabash is not installed under $CALABASH."
exit 1
else
echo "[info] Convert single page HTML to wrapped HTML" >> $LOG
$CALABASH \
-o css=$WORKDIR/$BASENAME.wrap.css \
-o result=$WORKDIR/$BASENAME.wrap.xhtml \
$DIR_URI/xpl/pdf2fxl.xpl \
path=$WORKDIR \
rastertext=$RASTERTEXT \
debug=$DEBUG \
debug-dir-uri=$DEBUGDIR_URI \
status-dir-uri=$DEBUGDIR_URI/status 2>&1 2>>$LOG
fi
if [[ "$EPUB" == "yes" ]]; then
echo "[info] create EPUB" >> $LOG
$CALABASH \
-i source=$WORKDIR/$BASENAME.wrap.xhtml \
-i meta=epubtools/sample/epub-config.xml \
epubtools/xpl/epub-convert.xpl \
debug=$DEBUG \
debug-dir-uri=$DEBUGDIR_URI \
status-dir-uri=$DEBUGDIR_URI/status 2>&1 2>>$LOG
cp -fv $WORKDIR/*.jpg $OUTDIR/epub/OEBPS
cd $OUTDIR/epub && zip -uv $OUTDIR/$BASENAME.epub OEBPS/*.jpg
fi
if [[ "$DEBUG" == "yes" && "EPUB" == "yes" ]]; then
# copy files
cp -u $WORKDIR/$BASENAME.epub $OUTDIR
fi