Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

fresh k2pdfopt v2.12

  • Loading branch information...
commit d988c98ae7ec889fecc0bc8b5cae051f7b573988 1 parent 7518581
@chrox chrox authored
Showing with 1,924 additions and 341 deletions.
  1. +0 −2  include_mod/tesseract.h
  2. +5 −0 k2pdfoptlib/bmpregion.c
  3. +77 −10 k2pdfoptlib/k2bmp.c
  4. +124 −20 k2pdfoptlib/k2file.c
  5. +30 −3 k2pdfoptlib/k2gui.c
  6. +8 −6 k2pdfoptlib/k2gui_cbox.c
  7. +184 −22 k2pdfoptlib/k2master.c
  8. +21 −6 k2pdfoptlib/k2menu.c
  9. +103 −11 k2pdfoptlib/k2parsecmd.c
  10. +28 −16 k2pdfoptlib/k2pdfopt.h
  11. +79 −16 k2pdfoptlib/k2proc.c
  12. +15 −0 k2pdfoptlib/k2publish.c
  13. +52 −36 k2pdfoptlib/k2settings.c
  14. +49 −2 k2pdfoptlib/k2settings2cmd.c
  15. +5 −4 k2pdfoptlib/k2sys.c
  16. +95 −14 k2pdfoptlib/k2usage.c
  17. +75 −1 k2pdfoptlib/k2version.c
  18. +16 −0 k2pdfoptlib/pagelist.c
  19. +20 −9 k2pdfoptlib/textrows.c
  20. +5 −6 readme_k2src.txt
  21. +15 −66 tesseract_mod/tesscapi.cpp
  22. +2 −2 willuslib/filelist.c
  23. +549 −0 willuslib/pdfwrite.c
  24. +11 −0 willuslib/strbuf.c
  25. +34 −5 willuslib/wfile.c
  26. +41 −9 willuslib/willus.h
  27. +1 −1  willuslib/willusversion.c
  28. +2 −34 willuslib/win.c
  29. +265 −23 willuslib/wmupdf.c
  30. +11 −0 willuslib/wsys.c
  31. +2 −17 willuslib/wzfile.c
View
2  include_mod/tesseract.h
@@ -14,9 +14,7 @@ extern "C" {
#endif
int tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out);
-const char* tess_capi_get_init_language();
int tess_capi_get_ocr(PIX *pix,char *outstr,int maxlen,FILE *out);
-int tess_capi_get_word_boxes(PIX *pix, BOXA **out_boxa, int is_cjk, FILE *out);
void tess_capi_end(void);
#ifdef __cplusplus
View
5 k2pdfoptlib/bmpregion.c
@@ -1331,7 +1331,12 @@ printf("1. textrow[%d] = figure.\n",textrows->n-1);
/* Remove rows with text height that seems to be too small */
if (remove_small_rows)
+ {
+ /* textrows_remove_small_rows needs types determined */
+ for (i=0;i<textrows->n;i++)
+ textrow_determine_type(region,k2settings,i);
textrows_remove_small_rows(textrows,k2settings,0.25,0.5,region);
+ }
/* Compute gaps between rows and row heights again */
textrows_compute_row_gaps(textrows,region->r2);
View
87 k2pdfoptlib/k2bmp.c
@@ -519,7 +519,8 @@ fclose(f);
}
/*
-** bmp must be grayscale! (cbmp = color, can be null)
+** bmp must be grayscale! (cbmp might be color, might be grayscale, can be null)
+** Handles cbmp either 8-bit or 24-bit in v2.10.
*/
void bmp_detect_vertical_lines(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,
double dpi,/* double minwidth_in, */
@@ -673,6 +674,8 @@ exit(10);
/*
** Calculate max vert line length. Line is terminated by nw consecutive white pixels
** on either side.
+**
+** v2.10--handle cbmp 8-bit correctly.
*/
static int vert_line_erase(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,WILLUSBITMAP *tmp,
int row0,int col0,double tanth,double minheight_in,
@@ -680,10 +683,22 @@ static int vert_line_erase(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,WILLUSBITMAP *tm
double dpi,int erase_vertical_lines)
{
- int lw,cc,maxdev,nw,dir,i,n;
+ int lw,cc,maxdev,nw,dir,i,n,cbpp;
int *c1,*c2,*w;
static char *funcname="vert_line_erase";
+#if (WILLUSDEBUGX & 0x8000)
+printf("@vert_line_erase(row0=%d,col0=%d,tanth=%g,minheight_in=%g\n"
+ " maxwidth_in=%g,white_thresh=%d,dpi=%g,evl=%d\n",
+row0,col0,tanth,minheight_in,
+maxwidth_in,white_thresh,dpi,erase_vertical_lines);
+printf(" bmp = %d x %d x %d\n",bmp->width,bmp->height,bmp->bpp);
+if (cbmp!=NULL)
+printf(" cbmp = %d x %d x %d\n",cbmp->width,cbmp->height,cbmp->bpp);
+if (tmp!=NULL)
+printf(" tmp = %d x %d x %d\n",tmp->width,tmp->height,tmp->bpp);
+#endif
+ cbpp = (cbmp!=NULL && cbmp->bpp==24) ? 3 : 1;
willus_dmem_alloc_warn(26,(void **)&c1,sizeof(int)*3*bmp->height,funcname,10);
c2=&c1[bmp->height];
w=&c2[bmp->height];
@@ -703,12 +718,18 @@ static int vert_line_erase(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,WILLUSBITMAP *tm
{
int del,brc;
+#if (WILLUSDEBUGX & 0x8000)
+printf("dir=%d\n",dir);
+#endif
brc = 0;
for (del=(dir==-1)?0:1;1;del++)
{
int r,c;
unsigned char *p;
+#if (WILLUSDEBUGX & 0x8000)
+printf("del=%d\n",del);
+#endif
r=row0+dir*del;
if (r<0 || r>bmp->height-1)
break;
@@ -766,6 +787,9 @@ static int vert_line_erase(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,WILLUSBITMAP *tm
c2[r]=bmp->width-1;
}
}
+#if (WILLUSDEBUGX & 0x8000)
+printf("n=%d\n",n);
+#endif
if (n>1)
sorti(w,n);
/*
@@ -776,6 +800,9 @@ printf("n=%d, w[%d]=%d, w[%d]=%d (mw=%g)\n",n,n/4,w[n/4],3*n/4,w[3*n/4],maxwidth
|| w[3*n/4] > (int)(maxwidth_in*dpi)
|| (erase_vertical_lines==1 && w[n-1] > maxwidth_in*dpi))
{
+#if (WILLUSDEBUGX & 0x8000)
+printf("Erasing area in temp bitmap.\n");
+#endif
/* Erase area in temp bitmap */
for (i=0;i<bmp->height;i++)
{
@@ -792,15 +819,27 @@ printf("n=%d, w[%d]=%d, w[%d]=%d (mw=%g)\n",n,n/4,w[n/4],3*n/4,w[3*n/4],maxwidth
}
else
{
+#if (WILLUSDEBUGX & 0x8000)
+printf("Erasing line width in source\n");
+#endif
/* Erase line width in source bitmap */
lw=w[3*n/4]+nw*2;
+#if (WILLUSDEBUGX & 0x8000)
+printf("1. lw=%d\n",lw);
+#endif
if (lw > maxwidth_in*dpi/2)
lw=maxwidth_in*dpi/2;
+#if (WILLUSDEBUGX & 0x8000)
+printf("2. lw=%d\n",lw);
+#endif
for (i=0;i<bmp->height;i++)
{
unsigned char *p;
int c0,cmin,cmax,count,white;
+#if (WILLUSDEBUGX & 0x8000)
+printf("i=%d\n",i);
+#endif
if (c1[i]<0 || c2[i]<0)
continue;
c0=col0+(i-row0)*tanth;
@@ -810,32 +849,60 @@ printf("n=%d, w[%d]=%d, w[%d]=%d (mw=%g)\n",n,n/4,w[n/4],3*n/4,w[3*n/4],maxwidth
cmax=c0+lw+1;
if (cmax>c2[i])
cmax=c2[i];
+#if (WILLUSDEBUGX & 0x8000)
+printf("A\n");
+#endif
p=bmp_rowptr_from_top(bmp,i);
c0 = (p[cmin] > p[cmax]) ? cmin : cmax;
white=p[c0];
+#if (WILLUSDEBUGX & 0x8000)
+printf("B\n");
+#endif
if (white <= white_thresh)
white = white_thresh+1;
if (white>255)
white=255;
+#if (WILLUSDEBUGX & 0x8000)
+printf("C\n");
+#endif
count=(cmax-cmin)+1;
p=&p[cmin];
+#if (WILLUSDEBUGX & 0x8000)
+printf("D\n");
+#endif
for (;count>0;count--,p++)
(*p)=white;
+#if (WILLUSDEBUGX & 0x8000)
+printf("E\n");
+#endif
if (cbmp!=NULL)
{
unsigned char *p0;
+
p=bmp_rowptr_from_top(cbmp,i);
- p0=p+c0*3;
- p=p+cmin*3;
+ p0=p+c0*cbpp;
+ p=p+cmin*cbpp;
count=(cmax-cmin)+1;
- for (;count>0;count--,p+=3)
- {
- p[0]=p0[0];
- p[1]=p0[1];
- p[2]=p0[2];
- }
+#if (WILLUSDEBUGX & 0x8000)
+printf("F width=%d, ht=%d, bpp=%d, c0=%d, cmin=%d, i=%d, count=%d\n",cbmp->width,cbmp->height,cbmp->bpp,c0,cmin,i,count);
+#endif
+ if (cbpp==3)
+ for (;count>0;count--,p+=3)
+ {
+ p[0]=p0[0];
+ p[1]=p0[1];
+ p[2]=p0[2];
+ }
+ else
+ memset(p,p0[0],count);
+#if (WILLUSDEBUGX & 0x8000)
+printf("G\n");
+#endif
}
}
+#if (WILLUSDEBUGX & 0x8000)
+printf(" done.\n");
+#endif
}
willus_dmem_free(26,(double **)&c1,funcname);
return(1);
View
144 k2pdfoptlib/k2file.c
@@ -31,6 +31,9 @@ static int k2_handle_preview(K2PDFOPT_SETTINGS *k2settings,MASTERINFO *masterinf
static int filename_comp(char *name1,char *name2);
static void filename_substitute(char *dst,char *fmt,char *src,int count,char *defext0);
static int overwrite_fail(char *outname,double overwrite_minsize_mb);
+static int toclist_valid(char *s,FILE *out);
+static WPDFOUTLINE *wpdfoutline_from_pagelist(char *pagelist,int maxpages);
+static int tocwrites=0;
/*
@@ -228,7 +231,7 @@ static double k2pdfopt_proc_one(K2PDFOPT_SETTINGS *k2settings0,char *filename,do
WILLUSBITMAP _marked,*marked;
WILLUSBITMAP preview_internal;
int i,status,pw,np,src_type,second_time_through,or_detect,orep_detect,preview;
- int pagecount,pagestep,pages_done;
+ int pagecount,pagestep,pages_done,local_tocwrites;
int errcnt,pixwarn;
FILELIST *fl,_fl;
int folder,dpi;
@@ -245,6 +248,7 @@ static double k2pdfopt_proc_one(K2PDFOPT_SETTINGS *k2settings0,char *filename,do
/*
printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",filename,rot_deg,k2out->bmp);
*/
+ local_tocwrites=0;
k2out->status = 1;
k2settings=&_k2settings;
k2pdfopt_settings_copy(k2settings,k2settings0);
@@ -449,6 +453,32 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
strcpy(mupdffilename,filename);
}
#endif
+ /* Get bookmarks / outline from PDF file */
+ if (!or_detect && k2settings->use_toc!=0 && !toclist_valid(k2settings->toclist,NULL))
+ {
+ masterinfo->outline=wpdfoutline_read_from_pdf_file(mupdffilename);
+ /* Save TOC if requested */
+ if (k2settings->tocsavefile[0]!='\0')
+ {
+ FILE *f;
+ f=fopen(k2settings->tocsavefile,tocwrites==0?"w":"a");
+ if (f!=NULL)
+ {
+ int i;
+ fprintf(f,"%sFILE: %s\n",tocwrites==0?"":"\n\n",mupdffilename);
+ for (i=strlen(mupdffilename)+6;i>0;i--)
+ fputc('-',f);
+ fprintf(f,"\n");
+ if (masterinfo->outline!=NULL)
+ wpdfoutline_echo2(masterinfo->outline,0,f);
+ else
+ fprintf(f,"(No outline info in file.)\n");
+ fclose(f);
+ tocwrites++;
+ local_tocwrites++;
+ }
+ }
+ }
}
else
#endif
@@ -484,6 +514,14 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
filelist_free(fl);
return(0.);
}
+ masterinfo->srcpages = np;
+ if (!or_detect && toclist_valid(k2settings->toclist,stdout))
+ {
+ if (pagelist_valid_page_range(k2settings->toclist))
+ masterinfo->outline=wpdfoutline_from_pagelist(k2settings->toclist,masterinfo->srcpages);
+ else
+ masterinfo->outline=wpdfoutline_read_from_text_file(k2settings->toclist);
+ }
pagecount = np<0 ? -1 : pagelist_count(k2settings->pagelist,np);
#ifdef HAVE_K2GUI
if (k2gui_active())
@@ -659,23 +697,23 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
/* Reset the display order for this source page */
if (k2settings->show_marked_source)
mark_source_page(k2settings,NULL,0,0xf);
- /* If we haven't just kicked out a page... */
- if (!k2settings_gap_override(k2settings))
+ /*
+ ** v2.10 Call masterinfo_publish() no matter what. If we've just kicked out a
+ ** page, it doesn't matter. It will do nothing.
+ */
+ masterinfo_publish(masterinfo,k2settings,
+ masterinfo_should_flush(masterinfo,k2settings));
+ if (preview && k2_handle_preview(k2settings,masterinfo,k2mark_page_count,
+ k2settings->dst_color?marked:src,k2out))
{
- masterinfo_publish(masterinfo,k2settings,
- k2settings->dst_break_pages>0 ? k2settings->dst_break_pages : 0);
- if (preview && k2_handle_preview(k2settings,masterinfo,k2mark_page_count,
- k2settings->dst_color?marked:src,k2out))
- {
- bmp_free(marked);
- bmp_free(srcgrey);
- bmp_free(src);
- masterinfo_free(masterinfo,k2settings);
- if (folder)
- filelist_free(fl);
- k2out->status=0;
- return(0.);
- }
+ bmp_free(marked);
+ bmp_free(srcgrey);
+ bmp_free(src);
+ masterinfo_free(masterinfo,k2settings);
+ if (folder)
+ filelist_free(fl);
+ k2out->status=0;
+ return(0.);
}
if (k2settings->show_marked_source && !preview)
publish_marked_page(mpdf,k2settings->dst_color ? marked : src,k2settings->src_dpi);
@@ -739,7 +777,14 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
k2out->status=0;
return(0.);
}
+ /*
+ ** v2.10 -- Calling masterinfo_flush() without checking if a page has just been
+ ** been flushed is fine at the end. If there is nothing left
+ ** in the master output bitmap, it won't do anything.
+ */
+ /*
if (k2settings->dst_break_pages<=0 && !k2settings_gap_override(k2settings))
+ */
masterinfo_flush(masterinfo,k2settings);
{
char cdate[128],author[256],title[256];
@@ -759,6 +804,8 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
author[0]=title[0]=cdate[0]='\0';
if (!k2settings->use_crop_boxes)
{
+ if (masterinfo->outline!=NULL)
+ pdffile_add_outline(&masterinfo->outfile,masterinfo->outline);
pdffile_finish(&masterinfo->outfile,title,author,masterinfo->pageinfo.producer,cdate);
pdffile_close(&masterinfo->outfile);
}
@@ -769,7 +816,7 @@ printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",file
wpdfboxes_echo(&masterinfo->pageinfo.boxes,stdout);
#endif
#ifdef HAVE_MUPDF_LIB
- wmupdf_remake_pdf(mupdffilename,dstfile,&masterinfo->pageinfo,1,stdout);
+ wmupdf_remake_pdf(mupdffilename,dstfile,&masterinfo->pageinfo,1,masterinfo->outline,stdout);
#endif
}
if (k2settings->show_marked_source)
@@ -804,6 +851,8 @@ wpdfboxes_echo(&masterinfo->pageinfo.boxes,stdout);
k2printf(TTEXT_BOLD "%d words" TTEXT_NORMAL " written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL " (%.1f MB).\n\n",masterinfo->wordcount,masterinfo->ocrfilename,size/1024./1024.);
}
#endif
+ if (local_tocwrites>0)
+ k2printf(TTEXT_BOLD "%d bytes" TTEXT_NORMAL " written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL ".\n\n",(int)(wfile_size(k2settings->tocsavefile)+.5),k2settings->tocsavefile);
masterinfo_free(masterinfo,k2settings);
if (folder)
filelist_free(fl);
@@ -855,10 +904,11 @@ static int k2_handle_preview(K2PDFOPT_SETTINGS *k2settings,MASTERINFO *masterinf
printf("Got preview bitmap: %d x %d x %d.\n",
masterinfo->preview_bitmap->width,masterinfo->preview_bitmap->height,masterinfo->preview_bitmap->bpp);
*/
- if (k2settings->preview_page>0)
- bmp_write(masterinfo->preview_bitmap,"k2pdfopt_out.png",NULL,100);
if (k2out->bmp==NULL)
+ {
+ bmp_write(masterinfo->preview_bitmap,"k2pdfopt_out.png",NULL,100);
bmp_free(masterinfo->preview_bitmap);
+ }
}
return(status);
}
@@ -1014,3 +1064,57 @@ static int overwrite_fail(char *outname,double overwrite_minsize_mb)
strcpy(outname,newname);
return(0);
}
+
+
+static int toclist_valid(char *s,FILE *out)
+
+ {
+ if (s[0]=='\0')
+ return(0);
+ if (pagelist_valid_page_range(s))
+ return(1);
+ if (wfile_status(s)==1)
+ return(1);
+ if (out!=NULL)
+ k2printf(ANSI_RED "\nTOC page list '%s' is not valid page range or file name."
+ ANSI_NORMAL "\n\n",s);
+ return(0);
+ }
+
+
+/*
+** Create outline from page list
+*/
+static WPDFOUTLINE *wpdfoutline_from_pagelist(char *pagelist,int maxpages)
+
+ {
+ int i;
+ WPDFOUTLINE *outline,*outline0;
+
+ outline0=outline=NULL;
+ for (i=0;1;i++)
+ {
+ int page;
+ char buf[64];
+ WPDFOUTLINE *oline;
+
+ page=pagelist_page_by_index(pagelist,i,maxpages);
+ if (page<0)
+ break;
+ sprintf(buf,"Chapter %d",i+1);
+ oline=malloc(sizeof(WPDFOUTLINE));
+ wpdfoutline_init(oline);
+ oline->title=malloc(strlen(buf)+1);
+ strcpy(oline->title,buf);
+ oline->srcpage=page-1;
+ oline->dstpage=-1;
+ if (i==0)
+ {
+ outline0=outline=oline;
+ continue;
+ }
+ outline->next=oline;
+ outline=outline->next;
+ }
+ return(outline0);
+ }
View
33 k2pdfoptlib/k2gui.c
@@ -834,22 +834,39 @@ printf("settings->src_trim=%d\n",k2settings->src_trim);
if (!strcmp(control->name,"straighten"))
k2settings->src_autostraighten = checked ? 4. : -1.;
else if (!strcmp(control->name,"break"))
- k2settings->dst_break_pages= checked ? 1 : 0;
+ k2settings->dst_break_pages= checked ? 2 : 1;
else if (!strcmp(control->name,"color"))
k2settings->dst_color= checked ? 1 : 0;
else if (!strcmp(control->name,"landscape"))
k2settings->dst_landscape = checked ? 1 : 0;
else if (!strcmp(control->name,"native"))
+ {
k2settings->use_crop_boxes = checked ? 1 : 0;
+ if (k2settings->use_crop_boxes)
+ {
+#ifdef HAVE_OCR_LIB
+ k2settings->dst_ocr=0;
+#endif
+ k2settings->text_wrap=0;
+ }
+ }
else if (!strcmp(control->name,"r2l"))
k2settings->src_left_to_right = checked ? 0 : 1;
else if (!strcmp(control->name,"markup"))
k2settings->show_marked_source = checked ? 1 : 0;
else if (!strcmp(control->name,"wrap"))
+ {
k2settings->text_wrap = checked ? 2 : 0;
+ if (k2settings->text_wrap)
+ k2settings->use_crop_boxes=0;
+ }
#ifdef HAVE_OCR_LIB
else if (!strcmp(control->name,"ocr"))
+ {
k2settings->dst_ocr = checked ? 't' : 'm';
+ if (k2settings->dst_ocr)
+ k2settings->use_crop_boxes=0;
+ }
#endif
else if (!strcmp(control->name,"evl"))
k2settings->erase_vertical_lines = checked ? 1 : 0;
@@ -1359,6 +1376,9 @@ static WILLUSGUICONTROL *k2gui_control_with_focus(int *index)
static void k2gui_update_controls(void)
{
+ /* Make checkboxes consistent */
+ if (k2gui!=NULL && k2gui->k2conv!=NULL)
+ k2pdfopt_settings_quick_sanity_check(&k2gui->k2conv->k2settings);
if (needs_redraw!=2)
needs_redraw=1;
willusgui_control_redraw(&k2gui->mainwin,1);
@@ -1876,7 +1896,7 @@ printf("dst_userwidth_units = %d\n",k2settings->dst_userwidth_units);
** Mode select menu
*/
{
- static char *modes[]={"default","copy","fitwidth","2-column",""};
+ static char *modes[]={"default","copy","trim","fitwidth","fitpage","2-column",""};
int nmodes;
for (nmodes=0;modes[nmodes][0]!='\0';nmodes++);
@@ -2407,7 +2427,7 @@ printf("cmdxtra.s='%s'\n",k2gui->cmdxtra.s);
checked=k2settings->src_autostraighten>=0.;
break;
case 1:
- checked=k2settings->dst_break_pages;
+ checked=(k2settings->dst_break_pages==2);
break;
case 2:
checked=k2settings->dst_color;
@@ -2648,6 +2668,13 @@ static WILLUSGUICONTROL *k2gui_control_by_name(char *name)
}
+int k2gui_previewing(void)
+
+ {
+ return(k2gui->preview_processing);
+ }
+
+
static void k2gui_preview_start(void)
{
View
14 k2pdfoptlib/k2gui_cbox.c
@@ -407,6 +407,8 @@ void k2gui_cbox_error(char *filename,int statuscode)
"File overwrite not allowed","","Uknown error"};
char buf[512];
+ if (k2gui_cbox==NULL || !k2gui_cbox->converting)
+ return;
if (statuscode<1 || statuscode>5)
statuscode=1;
sprintf(buf,"Conversion of file %s aborted (%s).",filename,err[statuscode-1]);
@@ -580,7 +582,7 @@ void k2gui_cbox_set_files_completed(int nfiles,char *message)
char buf[256];
int color;
- if (k2gui_cbox==NULL)
+ if (k2gui_cbox==NULL || !k2gui_cbox->converting)
return;
if (message==NULL)
sprintf(buf,"%d of %d file%s completed.",nfiles,k2gui_cbox->num_files,k2gui_cbox->num_files==1?"":"s");
@@ -617,7 +619,7 @@ void k2gui_cbox_set_pages_completed(int n,char *message)
int color;
double progress;
- if (k2gui_cbox==NULL)
+ if (k2gui_cbox==NULL || !k2gui_cbox->converting)
return;
color=0xd0ffd0;
if (k2gui_cbox->num_pages>0)
@@ -639,7 +641,7 @@ void k2gui_cbox_set_pages_completed(int n,char *message)
void k2gui_cbox_set_num_files(int nfiles)
{
- if (k2gui_cbox!=NULL)
+ if (k2gui_cbox!=NULL && k2gui_cbox->converting)
k2gui_cbox->num_files=nfiles;
}
@@ -647,7 +649,7 @@ void k2gui_cbox_set_num_files(int nfiles)
void k2gui_cbox_set_num_pages(int npages)
{
- if (k2gui_cbox!=NULL)
+ if (k2gui_cbox!=NULL && k2gui_cbox->converting)
k2gui_cbox->num_pages=npages;
}
@@ -655,7 +657,7 @@ void k2gui_cbox_set_num_pages(int npages)
void k2gui_cbox_set_filename(char *name)
{
- if (k2gui_cbox!=NULL)
+ if (k2gui_cbox!=NULL && k2gui_cbox->converting)
{
strncpy(k2gui_cbox->filename,k2gui_short_name(name),255);
k2gui_cbox->filename[255]='\0';
@@ -679,7 +681,7 @@ printf("\n error count set to %d\n\n",k2gui_cbox->error_count);
void k2gui_cbox_increment_error_count(void)
{
- if (k2gui_cbox!=NULL)
+ if (k2gui_cbox!=NULL && k2gui_cbox->converting)
{
k2gui_cbox->error_count++;
#if (WILLUSDEBUG & 0x2000)
View
206 k2pdfoptlib/k2master.c
@@ -42,7 +42,7 @@ static void find_word_gaps_using_wrectmaps(WRECTMAPS *wrectmaps,int **pgappos,
static void find_word_gaps_using_textrow(WILLUSBITMAP *src,K2PDFOPT_SETTINGS *k2settings,
int **pgappos,int **pgapsize,int *png,int whitethresh,
int dpi);
-static int masterinfo_break_point(MASTERINFO *masterinfo,int maxsize);
+static int masterinfo_break_point(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings,int maxsize);
void masterinfo_init(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings)
@@ -51,9 +51,13 @@ void masterinfo_init(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings)
extern char *k2pdfopt_version;
int i;
+ /* Init outline / bookmarks */
+ masterinfo->outline=NULL;
+ masterinfo->outline_srcpage_completed=-1;
masterinfo->preview_bitmap=NULL;
masterinfo->preview_captured=0;
masterinfo->published_pages=0;
+ masterinfo->srcpages = -1;
masterinfo->wordcount=0;
masterinfo->debugfolder[0]='\0';
bmp_init(&masterinfo->bmp);
@@ -100,6 +104,7 @@ void masterinfo_free(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings)
#endif
wrapbmp_free(&masterinfo->wrapbmp);
bmp_free(&masterinfo->bmp);
+ wpdfoutline_free(masterinfo->outline);
}
@@ -131,9 +136,9 @@ int masterinfo_new_source_page_init(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2
int white;
white=k2settings->src_whitethresh;
+ masterinfo->pageinfo.srcpage = pageno;
if (k2settings->use_crop_boxes)
{
- masterinfo->pageinfo.srcpage = pageno;
masterinfo->pageinfo.srcpage_rot_deg=0.;
masterinfo->pageinfo.srcpage_fine_rot_deg = 0.;
}
@@ -653,7 +658,9 @@ printf("maxgap_pixels=%d\n",maxgap_pixels);
** mandatory_region_gap==4 means this is the first bitmap being added for the whole
** document, so we don't need to add a gap in that case, either.
*/
- if (masterinfo->mandatory_region_gap==1)
+ if (k2settings->dst_fit_to_page==-2)
+ gap_pixels = 0;
+ else if (masterinfo->mandatory_region_gap==1)
gap_pixels= k2settings_gap_override(k2settings) ? 0
: masterinfo->page_region_gap_in*region->dpi;
else if (textrow[0].type==REGION_TYPE_FIGURE || lastrow->type==REGION_TYPE_FIGURE)
@@ -875,7 +882,7 @@ k2printf("start: mi->rows=%d, rr=%d\n",masterinfo->rows,rr);
return(0);
/* Get a suitable breaking point for the next page */
- bp=masterinfo_break_point(masterinfo,maxsize);
+ bp=masterinfo_break_point(masterinfo,k2settings,maxsize);
if (k2settings->verbose)
k2printf("bp: maxsize=%d, bp=%d, r0=%d\n",maxsize,bp,r0);
#if (WILLUSDEBUGX & 64)
@@ -1074,6 +1081,30 @@ k2printf("mi->published_pages=%d\n",masterinfo->published_pages);
/*
+** Return 0 if master bitmap should not be flushed.
+** NZ if it should be flushed.
+** Based on user settings for page breaks.
+** v2.10
+**
+*/
+int masterinfo_should_flush(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings)
+
+ {
+ if (k2settings_gap_override(k2settings))
+ return(0);
+ if (k2settings->dst_break_pages==0)
+ return(0);
+ if (k2settings->dst_break_pages>1)
+ return(1);
+ /* Check list of pages where user has requested a page break */
+ if (k2settings->bpl[0]!='\0' && pagelist_includes_page(k2settings->bpl,masterinfo->pageinfo.srcpage+1,masterinfo->srcpages))
+ return(1);
+ /* Check outline / bookmarks if available */
+ return(wpdfoutline_includes_srcpage(masterinfo->outline,masterinfo->pageinfo.srcpage+1,1)>0 ? 1 : 0);
+ }
+
+
+/*
** Correctly complete crop boxes that are associated with this output page
**
** box->x1,y1,userx,usery start out as:
@@ -1222,7 +1253,23 @@ static void bmp_pad_and_mark(WILLUSBITMAP *dst,WILLUSBITMAP *src,K2PDFOPT_SETTIN
{
int i,r,r0,bw,bytespp,pl,pr,pt,pb;
-
+/*
+printf("Pad: %d,%d,%d,%d\n",k2settings->pad_left,
+k2settings->pad_top,
+k2settings->pad_right,
+k2settings->pad_bottom);
+printf("OM: %g,%g,%g,%g\n",
+k2settings->dst_marleft,
+k2settings->dst_martop,
+k2settings->dst_marright,
+k2settings->dst_marbot);
+printf("Mar: %g,%g,%g,%g\n",
+k2settings->mar_left,
+k2settings->mar_top,
+k2settings->mar_right,
+k2settings->mar_bot);
+printf("usecropboxes=%d\n",k2settings->use_crop_boxes);
+*/
r0=(int)(bmpdpi*k2settings->dst_martop+.5);
if (k2settings->dst_landscape)
{
@@ -1565,6 +1612,7 @@ static void find_word_gaps_using_textrow(WILLUSBITMAP *src,K2PDFOPT_SETTINGS *k2
}
+#if 0
/*
** Find gaps in the master bitmap so that it can be broken into regions
** which go onto separate pages.
@@ -1578,10 +1626,14 @@ static int masterinfo_break_point(MASTERINFO *masterinfo,int maxsize)
int bp1f,bp2f;
int bp1e,bp2e;
-/*
k2printf("@breakpoint, mi->rows=%d, maxsize=%d\n",masterinfo->rows,maxsize);
k2printf(" fit_to_page=%d\n",(int)masterinfo->fit_to_page);
-*/
+{
+static int count=1;
+char filename[256];
+sprintf(filename,"page%04d.png",count++);
+bmp_write(&masterinfo->bmp,filename,stdout,100);
+}
/* masterinfo->fit_to_page==-2 means user specified -f2p -2 which means */
/* flush entire contents of master to single page every time. */
if (masterinfo->rows<maxsize || masterinfo->fit_to_page==-2)
@@ -1595,10 +1647,8 @@ k2printf(" fit_to_page=%d\n",(int)masterinfo->fit_to_page);
else
scanheight=maxsize;
/* If available rows almost exactly fit page, just send the whole thing */
-/*
k2printf(" scanheight=%d, mi->rows=%d, fabs=%g\n",scanheight,masterinfo->rows,
fabs((double)scanheight/masterinfo->rows-1.));
-*/
if (masterinfo->fit_to_page==0 && (abs(scanheight-masterinfo->rows)<=1
|| fabs((double)scanheight/masterinfo->rows-1.)<.002))
return(masterinfo->rows);
@@ -1633,8 +1683,8 @@ fabs((double)scanheight/masterinfo->rows-1.));
{
if (rowcount[i]==0)
{
-// if (cw==0)
-// k2printf("%d black\n",fc);
+if (cw==0)
+k2printf("%d black\n",fc);
cw++;
if (fc>figure)
{
@@ -1664,29 +1714,27 @@ fabs((double)scanheight/masterinfo->rows-1.));
}
else
{
-// if (fc==0)
-// k2printf("%d white\n",cw);
+if (fc==0)
+k2printf("%d white\n",cw);
cw=0;
nwc++;
fc++;
}
}
-/*
{
static int count=0;
FILE *out;
count++;
k2printf("rows=%d, gs=%d, scanheight=%d, bp1=%d, bp2=%d\n",masterinfo->rows,goodsize,scanheight,bp1,bp2);
k2printf(" bp1f=%d, bp2f=%d, bp1e=%d, bp2e=%d\n",bp1f,bp2f,bp1e,bp2e);
-bmp_write(&masterinfo->bmp,"master.png",stdout,100);
-out=fopen("rc.dat","w");
-for (i=0;i<scanheight;i++)
-fprintf(out,"%d\n",rowcount[i]);
-fclose(out);
-if (count==2)
-exit(10);
+//bmp_write(&masterinfo->bmp,"master.png",stdout,100);
+//out=fopen("rc.dat","w");
+//for (i=0;i<scanheight;i++)
+//fprintf(out,"%d\n",rowcount[i]);
+//fclose(out);
+// if (count==2)
+// exit(10);
}
-*/
willus_dmem_free(29,(double **)&rowcount,funcname);
if (masterinfo->fit_to_page==0)
{
@@ -1708,3 +1756,117 @@ exit(10);
return(bp2e);
return(bp1e);
}
+#endif /* 0 */
+
+
+/*
+** Find gaps in the master bitmap so that it can be broken into regions
+** which go onto separate pages.
+**
+** maxsize is the ideal desired bitmap size to fit the page.
+** Depending on the fit_to_page setting, the bitmap can actually go
+** beyond this.
+**
+** Re-written to use bmpregion_find_textrows() in v2.10.
+**
+*/
+static int masterinfo_break_point(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings,int maxsize)
+
+ {
+ int scanheight,j,r1,r2,r1a,r2a;
+ BMPREGION region;
+ WILLUSBITMAP *bmp,_bmp;
+
+/*
+k2printf("@breakpoint, mi->rows=%d, maxsize=%d\n",masterinfo->rows,maxsize);
+k2printf(" fit_to_page=%d\n",(int)masterinfo->fit_to_page);
+{
+static int count=1;
+char filename[256];
+sprintf(filename,"page%04d.png",count++);
+bmp_write(&masterinfo->bmp,filename,stdout,100);
+}
+*/
+ /* masterinfo->fit_to_page==-2 means user specified -f2p -2 which means */
+ /* flush entire contents of master to single page every time. */
+ if (masterinfo->rows<maxsize || masterinfo->fit_to_page==-2)
+ return(masterinfo->rows);
+
+ /* scanheight tells how far down the master bitmap to scan */
+ if (masterinfo->fit_to_page==-1)
+ scanheight=masterinfo->rows;
+ else if (masterinfo->fit_to_page>0)
+ scanheight=(int)(((1.+masterinfo->fit_to_page/100.)*maxsize)+.5);
+ else
+ scanheight=maxsize;
+ /* If available rows almost exactly fit page, just send the whole thing */
+/*
+k2printf(" scanheight=%d, mi->rows=%d, fabs=%g\n",scanheight,masterinfo->rows,
+fabs((double)scanheight/masterinfo->rows-1.));
+*/
+ if (masterinfo->fit_to_page==0 && (abs(scanheight-masterinfo->rows)<=1
+ || fabs((double)scanheight/masterinfo->rows-1.)<.002))
+ return(masterinfo->rows);
+ if (scanheight > masterinfo->rows)
+ scanheight=masterinfo->rows;
+
+ /*
+ ** Find text rows (and gaps between)
+ */
+ bmp=&_bmp;
+ bmp_init(bmp);
+ if (bmp_is_grayscale(&masterinfo->bmp))
+ bmp_copy(bmp,&masterinfo->bmp);
+ else
+ bmp_convert_to_grayscale_ex(bmp,&masterinfo->bmp);
+ bmp->height=scanheight*1.4;
+ if (bmp->height > masterinfo->rows)
+ bmp->height = masterinfo->rows;
+ bmpregion_init(&region);
+ region.bgcolor=masterinfo->bgcolor;
+ region.c1=0;
+ region.c2=bmp->width-1;
+ region.r1=0;
+ region.r2=bmp->height-1;
+ region.bmp8=bmp;
+ region.bmp=bmp;
+ region.dpi=k2settings->dst_dpi;
+ bmpregion_find_textrows(&region,k2settings,0,1);
+/*
+{
+static int count=1;
+char filename[256];
+sprintf(filename,"page%04d.png",count);
+bmp_write(bmp,filename,stdout,100);
+printf("\nmaxsize=%d, scanheight=%d, dst_dpi=%d\n",maxsize,scanheight,k2settings->dst_dpi);
+printf("OUTPUT PAGE %d\n",count++);
+for (j=0;j<region.textrows.n;j++)
+{
+printf("%d. ",j+1);
+textrow_echo(&region.textrows.textrow[j],stdout);
+}
+}
+*/
+ bmp_free(bmp);
+ for (r1a=r2a=r1=r2=j=0;j<region.textrows.n;j++)
+ {
+ TEXTROW *row;
+
+ row=&region.textrows.textrow[j];
+ r2=row->r2+1;
+ if (j<region.textrows.n-1)
+ r2a=(row->r2+region.textrows.textrow[j+1].r1)/2; /* Midpoint */
+ else
+ r2a=r2;
+ if (row->r2 > maxsize)
+ break;
+ r1=r2;
+ r1a=r2a;
+ }
+ bmpregion_free(&region);
+ if (r1a<=maxsize)
+ r1=r1a;
+ if (r2a<=scanheight)
+ r2=r2a;
+ return(r1<maxsize*.25 ? (r2<scanheight ? r2:scanheight) : r1);
+ }
View
27 k2pdfoptlib/k2menu.c
@@ -278,11 +278,18 @@ int k2pdfopt_menu(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,STRBUF
}
else if (!stricmp(buf,"bp"))
{
- status=userinput_string("Break output pages at end of each input page",ansyesno,k2settings->dst_break_pages?"y":"n");
+ int bpo;
+ printf("Page breaking options:\n"
+ "1. Absolutely no special page breaks.\n"
+ "2. Special page breaks at bookmark positions only.\n"
+ "3. Break pages after each source page.\n"
+ "4. Break pages at each \"green\" boundary.\n"
+ "5. Put a gap between each source page.\n");
+ bpo = k2settings->dst_break_pages < 0 ? 5 : k2settings->dst_break_pages+1;
+ status=userinput_integer("Enter option",bpo,&bpo,1,5);
if (status<0)
return(status);
- k2settings->dst_break_pages=(status==0) ? 1 : 0;
- if (!k2settings->dst_break_pages)
+ if (bpo==5)
{
double x;
x=0.;
@@ -295,10 +302,16 @@ int k2pdfopt_menu(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,STRBUF
strbuf_sprintf(usermenu,"-bp %g",x);
}
else
+ {
+ k2settings->dst_break_pages=1;
strbuf_sprintf(usermenu,"-bp-");
+ }
}
else
- strbuf_sprintf(usermenu,"-bp");
+ {
+ k2settings->dst_break_pages=bpo-1;
+ strbuf_sprintf(usermenu,"-bp%s",bpo==4?"+":(bpo==3?"":(bpo==2?"-":"--")));
+ }
status=userinput_integer("Fit-to-page value",k2settings->dst_fit_to_page,&k2settings->dst_fit_to_page,
-2,999);
if (status<0)
@@ -559,8 +572,8 @@ int k2pdfopt_menu(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,STRBUF
}
else if (!stricmp(buf,"mo"))
{
- static char *modename[]={"default","copy","fitwidth","2-column","grid",""};
- static char *shortname[]={"def","copy","fw","2col","grid"};
+ static char *modename[]={"default","copy","trim","crop","fitwidth","fitpage","2-column","grid",""};
+ static char *shortname[]={"def","copy","tm","crop","fw","fp","2col","grid"};
double v[3];
status=userinput_string("Operating mode",modename,"default");
@@ -862,11 +875,13 @@ int k2pdfopt_menu(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,STRBUF
if (!status)
k2settings->text_wrap=2;
strbuf_sprintf(usermenu,"-wrap%s",k2settings->text_wrap==2?"+":"");
+ /*
status=userinput_string("Preserve indentation",ansyesno,k2settings->preserve_indentation?"y":"n");
if (status<0)
return(status);
k2settings->preserve_indentation=!status;
strbuf_sprintf(usermenu,"-pi%s",k2settings->preserve_indentation?"":"-");
+ */
status=userinput_string("Detect/eliminate hyphens",ansyesno,k2settings->hyphen_detect?"y":"n");
if (status<0)
return(status);
View
114 k2pdfoptlib/k2parsecmd.c
@@ -105,6 +105,7 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
MINUS_OPTION("-guimin",guimin,2)
#endif
MINUS_OPTION("-?",show_usage,2)
+ MINUS_OPTION("-toc",use_toc,1)
MINUS_OPTION("-sp",echo_source_page_count,1)
MINUS_OPTION("-neg",dst_negative,1)
MINUS_OPTION("-hy",hyphen_detect,1)
@@ -250,25 +251,32 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
if (setvals==1)
{
if (!stricmp(cl->cmdarg,"pdfr")
- || !stricmp(cl->cmdarg,"copy"))
+ || !stricmp(cl->cmdarg,"copy")
+ || !stricmp(cl->cmdarg,"trim")
+ || !stricmp(cl->cmdarg,"crop")
+ || !stricmp(cl->cmdarg,"tm"))
{
+ int tm,crop;
+
+ crop=(!stricmp(cl->cmdarg,"crop"));
+ tm=(!stricmp(cl->cmdarg,"trim") || !stricmp(cl->cmdarg,"tm"));
/* -n- -wrap- -col 1 -vb -2 -w -1 -h -1 -dpi 150 -rt 0 -c -t- -f2p -2 */
/* -m 0 -om 0 -pl 0 -pr 0 -pt 0 -pb 0 -mc- */
- k2settings->use_crop_boxes=0;
+ k2settings->use_crop_boxes= (tm||crop) ? 1 : 0;
#ifdef HAVE_OCR_LIB
- k2settings->dst_ocr='m';
+ k2settings->dst_ocr=(tm||crop) ? 0 : 'm';
#endif
k2settings->text_wrap=0;
k2settings->max_columns=1;
k2settings->vertical_break_threshold=-2;
k2settings->dst_userwidth=1.0;
- k2settings->dst_userwidth_units=UNITS_SOURCE;
+ k2settings->dst_userwidth_units=(tm||crop) ? UNITS_TRIMMED : UNITS_SOURCE;
k2settings->dst_userheight=1.0;
- k2settings->dst_userheight_units=UNITS_SOURCE;
+ k2settings->dst_userheight_units=(tm||crop) ? UNITS_TRIMMED : UNITS_SOURCE;
k2settings->dst_dpi=150;
k2settings->src_rot=0.;
k2settings->dst_color=1;
- k2settings->src_trim=0;
+ k2settings->src_trim=tm ? 1 : 0;
k2settings->dst_fit_to_page=-2;
k2settings->mar_left=k2settings->mar_top=k2settings->mar_right=k2settings->mar_bot=0.;
k2settings->dst_mar=k2settings->dst_marleft=k2settings->dst_martop=k2settings->dst_marright=k2settings->dst_marbot=0.;
@@ -277,8 +285,13 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
}
else if (!stricmp(cl->cmdarg,"fw")
|| !stricmp(cl->cmdarg,"sopdf")
- || !stricmp(cl->cmdarg,"fitwidth"))
+ || !stricmp(cl->cmdarg,"fitwidth")
+ || !stricmp(cl->cmdarg,"fp")
+ || !stricmp(cl->cmdarg,"fitpage"))
{
+ int fitpage;
+
+ fitpage=(!stricmp(cl->cmdarg,"fp") || !stricmp(cl->cmdarg,"fitpage"));
/* -wrap- -col 1 -vb -2 -t -ls */
k2settings->use_crop_boxes=1;
#ifdef HAVE_OCR_LIB
@@ -288,7 +301,9 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
k2settings->max_columns=1;
k2settings->vertical_break_threshold=-2;
k2settings->src_trim=1;
- k2settings->dst_landscape=1;
+ if (fitpage)
+ k2settings->dst_fit_to_page=-2;
+ k2settings->dst_landscape=fitpage ? 0 : 1;
}
else if (!stricmp(cl->cmdarg,"2col")
|| !stricmp(cl->cmdarg,"2-column")
@@ -406,18 +421,19 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
continue;
}
if (!stricmp(cl->cmdarg,"-bp") || !stricmp(cl->cmdarg,"-bp-")
+ || !stricmp(cl->cmdarg,"-bp--")
|| !stricmp(cl->cmdarg,"-bp+"))
{
if (cl->cmdarg[3]=='-')
{
if (setvals==1)
- k2settings->dst_break_pages=0;
+ k2settings->dst_break_pages = (cl->cmdarg[4]=='-' ? 0 : 1);
continue;
}
if (cl->cmdarg[3]=='+')
{
if (setvals==1)
- k2settings->dst_break_pages=2;
+ k2settings->dst_break_pages=3;
continue;
}
if (cmdlineinput_next(cl)==NULL)
@@ -430,7 +446,7 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
else
{
if (setvals==1)
- k2settings->dst_break_pages=1;
+ k2settings->dst_break_pages=2;
readnext=0;
}
continue;
@@ -876,6 +892,79 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
}
continue;
}
+ if (!stricmp(cl->cmdarg,"-cbox-"))
+ {
+ if (setvals==1)
+ k2settings->cropboxes.n=0;
+ continue;
+ }
+ if (!stricmp(cl->cmdarg,"-cbox")
+ || !stricmp(cl->cmdarg,"-cboxe")
+ || !stricmp(cl->cmdarg,"-cboxo"))
+ {
+ int c;
+
+ c=tolower(cl->cmdarg[5]);
+ if (cmdlineinput_next(cl)==NULL)
+ break;
+ if (setvals==1)
+ {
+ double v[4];
+ int na,index;
+
+ if (k2settings->cropboxes.n>=MAXK2CROPBOXES)
+ {
+ static int warned=0;
+ if (!warned && !quiet)
+ {
+ k2printf(TTEXT_WARN "\a\n** Max crop boxes exceeded (max=%d). **\n\n",
+ MAXK2CROPBOXES);
+ k2printf(TTEXT_WARN "\a\n** Crop box %s and subsequent ignored. **\n\n",
+ cl->cmdarg);
+ }
+ warned=1;
+ continue;
+ }
+ na=string_read_doubles(cl->cmdarg,v,4);
+ if (na!=4)
+ {
+ if (!quiet)
+ k2printf(TTEXT_WARN "\a\n** Crop box %s is invalid and will be ignored. **\n\n"
+ TTEXT_NORMAL,cl->cmdarg);
+ }
+ else
+ {
+ index=k2settings->cropboxes.n;
+ k2settings->cropboxes.cropbox[index].flags=(c=='e')?1:(c=='o'?2:3);
+ k2settings->cropboxes.cropbox[index].left=v[0];
+ k2settings->cropboxes.cropbox[index].top=v[1];
+ k2settings->cropboxes.cropbox[index].width=v[2];
+ k2settings->cropboxes.cropbox[index].height=v[3];
+ k2settings->cropboxes.n++;
+ }
+ }
+ continue;
+ }
+ if (!stricmp(cl->cmdarg,"-pad"))
+ {
+ if (cmdlineinput_next(cl)==NULL)
+ break;
+ if (setvals==1)
+ {
+ double v[4];
+ int na;
+ na=string_read_doubles(cl->cmdarg,v,4);
+ if (na>=1)
+ k2settings->pad_left=k2settings->pad_top=k2settings->pad_right=k2settings->pad_bottom=v[0];
+ if (na>=2)
+ k2settings->pad_top=k2settings->pad_right=k2settings->pad_bottom=v[1];
+ if (na>=3)
+ k2settings->pad_right=k2settings->pad_bottom=v[2];
+ if (na>=4)
+ k2settings->pad_bottom=v[3];
+ }
+ continue;
+ }
if (!strnicmp(cl->cmdarg,"-hq",3))
{
if (setvals==1)
@@ -915,6 +1004,9 @@ int parse_cmd_args(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
readnext=0;
continue;
}
+ NEEDS_STRING("-toclist",toclist,2047);
+ NEEDS_STRING("-tocsave",tocsavefile,MAXFILENAMELEN-1);
+ NEEDS_STRING("-bpl",bpl,2047);
NEEDS_STRING("-p",pagelist,1023)
#ifdef HAVE_OCR_LIB
NEEDS_STRING("-ocrout",ocrout,127)
View
44 k2pdfoptlib/k2pdfopt.h
@@ -37,6 +37,7 @@
** 0x1000 = find text words
** 0x2000 = GUI
** 0x4000 = Keep console for GUI debugging
+** 0x8000 = vertical line detection
**
*/
@@ -55,7 +56,7 @@
#include <willus.h>
/* Uncomment below if compiling for Kindle PDF Viewer */
-#define K2PDFOPT_KINDLEPDFVIEWER
+/* #define K2PDFOPT_KINDLEPDFVIEWER */
/*
** The HAVE_..._LIB defines should now be carried over from willus.h,
@@ -69,11 +70,9 @@
#ifdef HAVE_GOCR_LIB
#undef HAVE_GOCR_LIB
#endif
-*/
#ifdef HAVE_TESSERACT_LIB
#undef HAVE_TESSERACT_LIB
#endif
-/*
#ifdef HAVE_DJVU_LIB
#undef HAVE_DJVU_LIB
#endif
@@ -149,6 +148,19 @@
/*
** DATA STRUCTURES
*/
+typedef struct
+ {
+ int flags; /* Bit 0 = even, Bit 1 = odd (3=both) */
+ double left,top,width,height; /* inches from upper-left corner of page */
+ } K2CROPBOX;
+
+#define MAXK2CROPBOXES 32
+
+typedef struct
+ {
+ K2CROPBOX cropbox[MAXK2CROPBOXES];
+ int n;
+ } K2CROPBOXES;
/*
** K2PDFOPT_SETTINGS stores user settings that affect the document processing.
@@ -248,6 +260,10 @@ typedef struct
double word_spacing;
double display_width_inches; /* Device width = dst_width / dst_dpi */
char pagelist[1024];
+ char bpl[2048]; /* Page break list--see -bpl option */
+ int use_toc;
+ char toclist[2048];
+ char tocsavefile[MAXFILENAMELEN];
int column_fitted;
double lm_org,bm_org,tm_org,rm_org,dpi_org;
double contrast_max;
@@ -271,6 +287,7 @@ typedef struct
int src_grid_cols;
int src_grid_overlap_percentage;
int src_grid_order; /* 0=down then across, 1=across then down */
+ K2CROPBOXES cropboxes; /* Crop boxes */
/*
** Preview options
*/
@@ -282,18 +299,6 @@ typedef struct
double no_wrap_ar_limit; /* -arlim */
double no_wrap_height_limit_inches; /* -whmax */
double little_piece_threshold_inches; /* -rwmin */
- /*
- ** Keeping track of vertical gaps
- **
- ** These vars removed from k2settings as of v2.00. Gaps between regions
- ** are kept track of in the masterinfo->lastrow var now. See masterinfo_add_bitmap().
- */
- /* double last_scale_factor_internal; */
- /* int line_spacing_internal; */ /* If > 0, try to maintain regular line spacing. If < 0, */
- /* indicates desired vert. gap before next region is added. */
- /* int last_rowbase_internal; */ /* Pixels between last text row baseline and current end */
- /* of destination bitmap. */
- /* int gap_override_internal; */ /* If > 0, apply this gap in wrapbmp_flush() and then reset. */
} K2PDFOPT_SETTINGS;
@@ -488,13 +493,16 @@ typedef struct
{
char srcfilename[MAXFILENAMELEN];
char ocrfilename[MAXFILENAMELEN];
- PDFFILE outfile; /* PDF output file data structure */
+ int outline_srcpage_completed; /* Which source page was last checked in the outline */
+ PDFFILE outfile; /* PDF output file data structure */
+ WPDFOUTLINE *outline; /* PDF outline / bookmarks structure--loaded by MuPDF only */
WILLUSBITMAP bmp; /* Master output bitmap collects pages that will go to */
/* the output device */
WILLUSBITMAP *preview_bitmap;
int preview_captured; /* = 1 if preview bitmap obtained */
WRAPBMP wrapbmp; /* See WRAPBMP structure */
WPDFPAGEINFO pageinfo; /* Holds crop boxes for native PDF output */
+ int srcpages; /* Total pages in source file */
int rows; /* Rows stored within the bmp structure */
int published_pages; /* Count of published pages */
int bgcolor;
@@ -705,6 +713,7 @@ void k2pdfopt_conversion_init(K2PDFOPT_CONVERSION *k2conv);
void k2pdfopt_conversion_close(K2PDFOPT_CONVERSION *k2conv);
void k2pdfopt_settings_copy(K2PDFOPT_SETTINGS *dst,K2PDFOPT_SETTINGS *src);
int k2pdfopt_settings_set_to_device(K2PDFOPT_SETTINGS *k2settings,DEVPROFILE *dp);
+void k2pdfopt_settings_quick_sanity_check(K2PDFOPT_SETTINGS *k2settings);
void k2pdfopt_settings_sanity_check(K2PDFOPT_SETTINGS *k2settings);
void k2pdfopt_settings_new_source_document_init(K2PDFOPT_SETTINGS *k2settings);
void k2pdfopt_settings_restore_output_dpi(K2PDFOPT_SETTINGS *k2settings);
@@ -760,6 +769,7 @@ void masterinfo_remove_top_rows(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2sett
int masterinfo_get_next_output_page(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings,
int flushall,WILLUSBITMAP *bmp,double *bmpdpi,
int *size_reduction,void *ocrwords);
+int masterinfo_should_flush(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings);
/* k2publish.c */
void masterinfo_publish(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings,int flushall);
@@ -773,6 +783,7 @@ void k2ocr_ocrwords_fill_in_ex(OCRWORDS *words,BMPREGION *region,K2PDFOPT_SETTIN
/* pagelist.c */
int pagelist_valid_page_range(char *pagelist);
+int pagelist_includes_page(char *pagelist,int pageno,int maxpages);
int pagelist_page_by_index(char *pagelist,int index,int maxpages);
int pagelist_count(char *pagelist,int maxpages);
@@ -925,6 +936,7 @@ char *k2gui_short_name(char *filename);
void k2gui_preview_toggle_size(int increment);
void k2gui_preview_refresh(void);
void k2gui_preview_paint(void);
+int k2gui_previewing(void);
/* k2gui_cbox.c */
int k2gui_cbox_converting(void);
View
95 k2pdfoptlib/k2proc.c
@@ -25,6 +25,8 @@
static void pageregions_grid(PAGEREGIONS *pageregions,BMPREGION *region,
K2PDFOPT_SETTINGS *k2settings,int level);
+static void pageregions_from_cropboxes(PAGEREGIONS *pageregions,BMPREGION *region,
+ K2PDFOPT_SETTINGS *k2settings,int srcpageno);
static void pageregions_find_next_level(PAGEREGIONS *pageregions_sorted,BMPREGION *srcregion,
K2PDFOPT_SETTINGS *k2settings,int level);
static double median_val(double *x,int n);
@@ -106,29 +108,38 @@ void bmpregion_source_page_add(BMPREGION *region,K2PDFOPT_SETTINGS *k2settings,
/* Find page regions */
pageregions=&_pageregions;
pageregions_init(pageregions);
- gridded = (k2settings->src_grid_cols > 0 && k2settings->src_grid_rows > 0);
- if (gridded)
- pageregions_grid(pageregions,region,k2settings,0);
+
+ /* Did user specify specific crop regions? */
+ gridded=0;
+ if (k2settings->cropboxes.n>0)
+ pageregions_from_cropboxes(pageregions,region,k2settings,masterinfo->pageinfo.srcpage);
else
{
- int maxlevels;
- if (k2settings->max_columns<=1)
- maxlevels=1;
- else if (k2settings->max_columns<=2)
- maxlevels=2;
+ gridded = (k2settings->src_grid_cols > 0 && k2settings->src_grid_rows > 0);
+ if (gridded)
+ pageregions_grid(pageregions,region,k2settings,0);
else
- maxlevels=3;
- pageregions_find(pageregions,region,k2settings,maxlevels);
+ {
+ int maxlevels;
+ if (k2settings->max_columns<=1)
+ maxlevels=1;
+ else if (k2settings->max_columns<=2)
+ maxlevels=2;
+ else
+ maxlevels=3;
+ pageregions_find(pageregions,region,k2settings,maxlevels);
+ }
}
-
trim_regions = ((k2settings->vertical_break_threshold<-1.5
|| k2settings->dst_fit_to_page==-2
- || gridded)
+ || gridded
+ || k2settings->cropboxes.n>0)
&& (k2settings->dst_userwidth_units==UNITS_TRIMMED
|| k2settings->dst_userheight_units==UNITS_TRIMMED));
/* Process page regions */
+/*
#if (!(WILLUSDEBUGX & 0x200))
if (k2settings->debug)
#endif
@@ -141,6 +152,7 @@ void bmpregion_source_page_add(BMPREGION *region,K2PDFOPT_SETTINGS *k2settings,
pageregions->pageregion[i].bmpregion.c2,
pageregions->pageregion[i].bmpregion.r2);
}
+*/
for (ipr=0;ipr<pageregions->n;ipr++)
{
int level,fitcols;
@@ -159,7 +171,7 @@ void bmpregion_source_page_add(BMPREGION *region,K2PDFOPT_SETTINGS *k2settings,
/* Process this region */
level = pageregions->pageregion[ipr].level;
- if (gridded || !pageregions->pageregion[ipr].fullspan)
+ if (k2settings->cropboxes.n>0 || gridded || !pageregions->pageregion[ipr].fullspan)
{
level *= 2;
fitcols = k2settings->fit_columns;
@@ -195,7 +207,8 @@ static void pageregions_grid(PAGEREGIONS *pageregions,BMPREGION *region,
BMPREGION *srcregion,_srcregion;
srcregion=&_srcregion;
- (*srcregion)=(*region);
+ bmpregion_init(srcregion);
+ bmpregion_copy(srcregion,region,0);
nr=k2settings->src_grid_cols*k2settings->src_grid_rows;
for (i=0;i<nr;i++)
{
@@ -243,6 +256,55 @@ static void pageregions_grid(PAGEREGIONS *pageregions,BMPREGION *region,
/*
+** Determine page regions from user-specified crop boxes.
+*/
+static void pageregions_from_cropboxes(PAGEREGIONS *pageregions,BMPREGION *region,
+ K2PDFOPT_SETTINGS *k2settings,int srcpageno)
+
+ {
+ int i;
+ BMPREGION *srcregion,_srcregion;
+
+ srcregion=&_srcregion;
+ bmpregion_init(srcregion);
+ bmpregion_copy(srcregion,region,0);
+ for (i=0;i<k2settings->cropboxes.n;i++)
+ {
+ K2CROPBOX *cropbox;
+
+ cropbox=&k2settings->cropboxes.cropbox[i];
+ /* If no even pages and page is even, skip */
+ if (!(cropbox->flags&1) && !(srcpageno&1))
+ continue;
+ /* If no odd pages and page is odd, skip */
+ if (!(cropbox->flags&2) && (srcpageno&1))
+ continue;
+ srcregion->c1 = cropbox->left*k2settings->src_dpi;
+ if (srcregion->c1<0)
+ srcregion->c1=0;
+ if (srcregion->c1 > region->bmp8->width-1)
+ srcregion->c1=region->bmp8->width-1;
+ srcregion->c2 = (cropbox->left+cropbox->width)*k2settings->src_dpi;
+ if (srcregion->c2<srcregion->c1)
+ srcregion->c2=srcregion->c1;
+ if (srcregion->c2> region->bmp8->width-1)
+ srcregion->c2=region->bmp8->width-1;
+ srcregion->r1 = cropbox->top*k2settings->src_dpi;
+ if (srcregion->r1<0)
+ srcregion->r1=0;
+ if (srcregion->r1 > region->bmp8->height-1)
+ srcregion->r1=region->bmp8->height-1;
+ srcregion->r2 = (cropbox->top+cropbox->height)*k2settings->src_dpi;
+ if (srcregion->r2<srcregion->r1)
+ srcregion->r2=srcregion->r1;
+ if (srcregion->r2 > region->bmp8->height-1)
+ srcregion->r2=region->bmp8->height-1;
+ pageregions_add_pageregion(pageregions,srcregion,0,0);
+ }
+ }
+
+
+/*
** Return sorted list (by display order) of page regions up to appropriate level
** of recursion.
** maxlevels = 1: One region
@@ -697,6 +759,7 @@ gotone++;
psrc=bmp_rowptr_from_top(k2settings->dst_color ? newregion->bmp : newregion->bmp8,i)+bpp*newregion->c1;
memcpy(pdst,psrc,nc*bpp);
}
+
/*
** Now scale to appropriate destination size.
**
@@ -1359,7 +1422,7 @@ k2printf(" vertical_break_threshold=%g\n",k2settings->vertical_break_threshol
allow_text_wrapping=k2settings->text_wrap;
allow_vertical_breaks=(k2settings->vertical_break_threshold > -1.5);
/* Special case to break pages at "green" gaps */
- if (k2settings->dst_break_pages==2)
+ if (k2settings->dst_break_pages==3)
allow_vertical_breaks=0;
justification_flags=0x8f; /* Don't know region justification status yet. Use user settings. */
rbdelta=-1;
@@ -1563,7 +1626,7 @@ aprintf(ANSI_RED "mi->mandatory_region_gap changed to 1 by vertically_break." AN
bmpregion_add(bregion,k2settings,masterinfo,allow_text_wrapping,trim_flags,
allow_vertical_breaks,force_scale,justification_flags,caller_id,
marking_flags,rbdelta,region_is_centered);
- if (k2settings->dst_break_pages==2)
+ if (k2settings->dst_break_pages==3)
masterinfo_flush(masterinfo,k2settings);
regcount++;
i1=i2+1;
View
15 k2pdfoptlib/k2publish.c
@@ -76,6 +76,20 @@ masterinfo->preview_bitmap->width,masterinfo->preview_bitmap->height,masterinfo-
}
continue;
}
+
+ /* v2.10, Put destination page in outline / bookmarks */
+/*
+printf("use_toc=%d, outline=%p, spc=%d, srcpage=%d\n",k2settings->use_toc,masterinfo->outline,masterinfo->outline_srcpage_completed,masterinfo->pageinfo.srcpage);
+*/
+ if (k2settings->use_toc!=0
+ && masterinfo->outline!=NULL
+ && masterinfo->outline_srcpage_completed!=masterinfo->pageinfo.srcpage)
+ {
+ wpdfoutline_set_dstpage(masterinfo->outline,masterinfo->pageinfo.srcpage,
+ masterinfo->published_pages);
+ masterinfo->outline_srcpage_completed = masterinfo->pageinfo.srcpage;
+ }
+
/*
** Nothing to do inside loop if using crop boxes -- they all
** get written after all pages have been processed.
@@ -131,6 +145,7 @@ ocrwords->word[i].text);
if (masterinfo->ocrfilename[0]!='\0')
ocrwords_to_textfile(ocrwords,masterinfo->ocrfilename,
masterinfo->published_pages>1);
+
pdffile_add_bitmap_with_ocrwords(&masterinfo->outfile,bmp,bmpdpi,
k2settings->jpeg_quality,size_reduction,
ocrwords,k2settings->dst_ocr_visibility_flags);
View
88 k2pdfoptlib/k2settings.c
@@ -63,7 +63,7 @@ void k2pdfopt_settings_init(K2PDFOPT_SETTINGS *k2settings)
ocrwords_init(&k2settings->dst_ocrwords);
#endif
k2settings->dst_dither=1;
- k2settings->dst_break_pages=0;
+ k2settings->dst_break_pages=1;
k2settings->render_dpi=167;
k2settings->fit_columns=1;
k2settings->user_src_dpi=-2.0;
@@ -116,7 +116,7 @@ void k2pdfopt_settings_init(K2PDFOPT_SETTINGS *k2settings)
k2settings->dst_negative=0;
k2settings->exit_on_complete=-1;
k2settings->show_marked_source=0;
- k2settings->use_crop_boxes=1;
+ k2settings->use_crop_boxes=0;
k2settings->preserve_indentation=1;
k2settings->defect_size_pts=0.75;
k2settings->max_vertical_gap_inches=0.25;
@@ -153,6 +153,13 @@ void k2pdfopt_settings_init(K2PDFOPT_SETTINGS *k2settings)
k2pdfopt_settings_set_to_device(k2settings,devprofile_get("k2"));
k2settings->dst_width = k2settings->dst_userwidth;
k2settings->dst_height = k2settings->dst_userheight;
+
+ /* v2.10 */
+ k2settings->use_toc = -1;
+ k2settings->toclist[0]='\0';
+ k2settings->tocsavefile[0]='\0';
+ k2settings->bpl[0]='\0';
+ k2settings->cropboxes.n=0;
}
@@ -198,13 +205,7 @@ int k2pdfopt_settings_set_to_device(K2PDFOPT_SETTINGS *k2settings,DEVPROFILE *dp
}
-/*
-** Check / adjust k2pdfopt user input settings.
-**
-** This function is called before beginning the conversion of each new document...?
-**
-*/
-void k2pdfopt_settings_sanity_check(K2PDFOPT_SETTINGS *k2settings)
+void k2pdfopt_settings_quick_sanity_check(K2PDFOPT_SETTINGS *k2settings)
{
/* printf("@k2pdfopt_settings_sanity_check, k2settings=%p.\n",k2settings); */
@@ -234,6 +235,19 @@ void k2pdfopt_settings_sanity_check(K2PDFOPT_SETTINGS *k2settings)
if (k2settings->dst_ocr)
k2settings->use_crop_boxes=0;
#endif
+ }
+
+
+/*
+** Check / adjust k2pdfopt user input settings.
+**
+** This function is called before beginning the conversion of each new document...?
+**
+*/
+void k2pdfopt_settings_sanity_check(K2PDFOPT_SETTINGS *k2settings)
+
+ {
+ k2pdfopt_settings_quick_sanity_check(k2settings);
/*
** Apply display resolution
@@ -363,9 +377,9 @@ void k2pdfopt_settings_set_margins_and_devsize(K2PDFOPT_SETTINGS *k2settings,
{
static int count=0;
- static double wu=0.; /* Store untrimmed width, height */
- static double hu=0.;
- double swidth_in,sheight_in;
+// static double wu=0.; /* Store untrimmed width, height */
+// static double hu=0.;
+ double twidth_in,theight_in,swidth_in,sheight_in;
int new_width,new_height,zeroarea;
WPDFPAGEINFO *pageinfo;
@@ -378,45 +392,47 @@ printf("@k2pdfopt_settings_set_margins_and_devsize(region=%p,trimmed=%d)\n",regi
{
count=0;
k2pdfopt_settings_set_device_margins(k2settings);
- swidth_in = 8.5;
- sheight_in = 11.0;
+ twidth_in=swidth_in = 8.5;
+ theight_in=sheight_in = 11.0;
}
else
{
count++;
- if (trimmed)
- {
- swidth_in = (double)(region->c2-region->c1+1) / region->dpi;
- if (swidth_in < 1.0)
- swidth_in = 1.0;
- sheight_in = (double)(region->r2-region->r1+1) / region->dpi;
- if (sheight_in < 1.0)
- sheight_in = 1.0;
- if (region->c2-region->c1<=0 || region->r2-region->r1<=0)
- zeroarea=1;
- }
- else
- {
- swidth_in = (double)region->bmp->width / region->dpi;
- sheight_in = (double)region->bmp->height / region->dpi;
- }
+ twidth_in = (double)(region->c2-region->c1+1) / region->dpi;
+ if (twidth_in < 1.0)
+ twidth_in = 1.0;
+ theight_in = (double)(region->r2-region->r1+1) / region->dpi;
+ if (theight_in < 1.0)
+ theight_in = 1.0;
+ if (region->c2-region->c1<=0 || region->r2-region->r1<=0)
+ zeroarea=1;
+ swidth_in = (double)region->bmp->width / region->dpi;
+ sheight_in = (double)region->bmp->height / region->dpi;
}
+
+/*
if (trimmed)
{
- if (wu<=0.)
- wu=swidth_in;
- if (hu<=0.)
- hu=sheight_in;
+ if (wu<=0.)
+ wu=twidth_in;
+ if (hu<=0.)
+ hu=theight_in;
+ if (wu<=0.)
+ wu=swidth_in;
+ if (hu<=0.)
+ hu=sheight_in;
}
else
{
wu=swidth_in;
hu=sheight_in;
}
+printf("wu=%g, hu=%g\n",wu,hu);
+*/
new_width=devsize_pixels(k2settings->dst_userwidth,k2settings->dst_userwidth_units,
- wu,swidth_in,k2settings->dst_dpi);
+ swidth_in,twidth_in,k2settings->dst_dpi);
new_height=devsize_pixels(k2settings->dst_userheight,k2settings->dst_userheight_units,
- hu,sheight_in,k2settings->dst_dpi);
+ sheight_in,theight_in,k2settings->dst_dpi);
if (k2settings->dst_landscape)
int_swap(new_width,new_height)
if (count==1 || (count>1 && (new_width!=k2settings->dst_width || new_height!=k2settings->dst_height)))
View
51 k2pdfoptlib/k2settings2cmd.c
@@ -31,6 +31,7 @@ static void integer_check(STRBUF *cmdline,char *optname,int *srcval,int dstval);
static void double_check(STRBUF *cmdline,char *optname,double *srcval,double dstval);
static void string_check(STRBUF *cmdline,char *optname,char *srcval,char *dstval);
static char *unit_string(int units);
+static int cropboxes_are_different(K2CROPBOXES *src,K2CROPBOXES *dst);
/*
** Fills cmdline with the appropriate command-line options that will
@@ -44,7 +45,7 @@ void k2pdfopt_settings_get_cmdline(STRBUF *cmdline,K2PDFOPT_SETTINGS *dst,
{
STRBUF *shortest,_shortest;
K2PDFOPT_SETTINGS _src0,*src0;
- static char *modelabel[]={"def","fw","2col","copy",""};
+ static char *modelabel[]={"def","fw","fp","crop","2col","tm","copy",""};
int i,j,nd;
/*
@@ -199,7 +200,9 @@ static void k2settings_to_cmd(STRBUF *cmdline,K2PDFOPT_SETTINGS *dst,
continue;
}
*/
+ /*
minus_check(cmdline,"-pi",&src->preserve_indentation,dst->preserve_indentation);
+ */
plus_minus_check(cmdline,"-wrap",&src->text_wrap,dst->text_wrap);
#ifdef HAVE_MUPDF_LIB
if (src->user_usegs != dst->user_usegs)
@@ -254,12 +257,17 @@ static void k2settings_to_cmd(STRBUF *cmdline,K2PDFOPT_SETTINGS *dst,
integer_check(cmdline,"-f2p",&src->dst_fit_to_page,dst->dst_fit_to_page);
double_check(cmdline,"-vb",&src->vertical_break_threshold,dst->vertical_break_threshold);
minus_check(cmdline,"-sm",&src->show_marked_source,dst->show_marked_source);
+ minus_check(cmdline,"-toc",&src->use_toc,dst->use_toc);
if (src->dst_break_pages != dst->dst_break_pages)
{
if (dst->dst_break_pages==0)
- strbuf_sprintf(cmdline,"-bp-");
+ strbuf_sprintf(cmdline,"-bp--");
else if (dst->dst_break_pages==1)
+ strbuf_sprintf(cmdline,"-bp-");
+ else if (dst->dst_break_pages==2)
strbuf_sprintf(cmdline,"-bp");
+ else if (dst->dst_break_pages==3)
+ strbuf_sprintf(cmdline,"-bp+");
else
strbuf_sprintf(cmdline,"-bp %g",(-1.-dst->dst_break_pages)/1000.);
src->dst_break_pages = dst->dst_break_pages;
@@ -332,6 +340,9 @@ static void k2settings_to_cmd(STRBUF *cmdline,K2PDFOPT_SETTINGS *dst,
double_check(cmdline,"-comax",&src->column_offset_max,dst->column_offset_max);
integer_check(cmdline,"-col",&src->max_columns,dst->max_columns);
string_check(cmdline,"-p",src->pagelist,dst->pagelist);
+ string_check(cmdline,"-bpl",src->bpl,dst->bpl);
+ string_check(cmdline,"-toclist",src->toclist,dst->toclist);
+ string_check(cmdline,"-tocsave",src->tocsavefile,dst->tocsavefile);
integer_check(cmdline,"-bpc",&src->dst_bpc,dst->dst_bpc);
double_check(cmdline,"-g",&src->dst_gamma,dst->dst_gamma);
double_check(cmdline,"-cg",&src->min_column_gap_inches,dst->min_column_gap_inches);
@@ -344,6 +355,23 @@ static void k2settings_to_cmd(STRBUF *cmdline,K2PDFOPT_SETTINGS *dst,
double_check(cmdline,"-ds",&src->document_scale_factor,dst->document_scale_factor);
double_check(cmdline,"-idpi",&src->user_src_dpi,dst->user_src_dpi);
integer_check(cmdline,"-odpi",&src->dst_dpi,dst->dst_dpi);
+ if (cropboxes_are_different(&src->cropboxes,&dst->cropboxes))
+ {
+ int i;
+
+ strbuf_sprintf(cmdline,"-cbox-");
+ for (i=0;i<dst->cropboxes.n;i++)
+ {
+ int c;
+ c=dst->cropboxes.cropbox[i].flags;
+ strbuf_sprintf(cmdline,"-cbox%s %g,%g,%g,%g",
+ c==1?"e":(c==2?"o":""),
+ dst->cropboxes.cropbox[i].left,
+ dst->cropboxes.cropbox[i].top,
+ dst->cropboxes.cropbox[i].width,
+ dst->cropboxes.cropbox[i].height);
+ }
+ }
if (src->dst_figure_justify!=dst->dst_figure_justify
|| src->dst_min_figure_height_in != dst->dst_min_figure_height_in)
{
@@ -509,4 +537,23 @@ static char *unit_string(int units)
else
return(strvals[0]);
}
+
+
+static int cropboxes_are_different(K2CROPBOXES *src,K2CROPBOXES *dst)
+
+ {
+ int i;
+
+ if (src->n != dst->n)
+ return(1);
+ for (i=0;i<src->n;i++)
+ if (fabs(src->cropbox[i].left-dst->cropbox[i].left)>1e-6
+ || fabs(src->cropbox[i].top-dst->cropbox[i].top)>1e-6
+ || fabs(src->cropbox[i].width-dst->cropbox[i].width)>1e-6
+ || fabs(src->cropbox[i].height-dst->cropbox[i].height)>1e-6
+ || src->cropbox[i].flags!=dst->cropbox[i].flags)
+ return(1);
+ return(0);
+ }
+
#endif /* HAVE_K2GUI */
View
9 k2pdfoptlib/k2sys.c
@@ -121,11 +121,12 @@ int k2printf(char *fmt,...)
va_start(args,fmt);
#ifdef HAVE_K2GUI
if (k2gui_active() && k2gui_cbox_converting())
-{
-status=avprintf(stdout,fmt,args);
+ {
+#if (WILLUSDEBUGX & 0x4000)
+ status=avprintf(stdout,fmt,args);
+#endif
status=k2gui_cbox_vprintf(stdout,fmt,args);
-
-}
+ }
else
#endif
status=avprintf(stdout,fmt,args);
View
109 k2pdfoptlib/k2usage.c
@@ -64,23 +64,48 @@ static char *k2pdfopt_options=
" written. If -sm is also specified, then the bitmap is of\n"
" marked source page <pageno>. If -bmp-, then <pageno> is not\n"
" necessary. Default is -bmp-.\n"
-"-bp[+|-] [<inches>] Break [do not break] output pages at end of each input\n"
+"-bp[+|-|--] [<inches>] Break [do not break] output pages at end of each input\n"
" page. Default is -bp-. If a numeric value is put after -bp,\n"
" then rather than breaking the output page at the end of each\n"
" input page, a gap is inserted of that many inches, e.g.\n"
" -bp 1 will insert a 1-inch gap between contents of each\n"
" input page. Special option -bp+ will break the pages at\n"
" the green boundaries between region as marked by the -sm\n"
-" option (see -sm).\n"
+" option (see -sm). If bookmark information is available\n"
+" and -toc is specified (on by default) page breaks will be\n"
+" inserted in the converted file at each bookmark unless -bp--\n"
+" is specified. See also -toc, -bpl.\n"
"-bpc <nn> Set the bits per color plane on the output device to <nn>.\n"
" The value of <nn> can be 1, 2, 4, or 8. The default is 4\n"
" to match the kindle's display capability.\n"
+"-bpl <srcpagelist> Insert page break in destination file before each source\n"
+" file page listed in <srcpagelist>. This has the same format\n"
+" as the -p option. See also -p, -bp, -toc, -toclist. Default\n"
+" is no page list. Example: -bpl 10,25,50,70,93,117,143.\n"
+" This automatically sets -bp to it's default value (-bp-).\n"
"-c[-] Output in color [grayscale]. Default is grayscale.\n"
/*
"-cd <threshold> Set column detection threshold. Default = 0.01. Range\n"
" is 0 to 100. Higher makes it easier to detect columns.\n"
" If PDF is scanned and speckled, might set to .02 or .03.\n"
*/
+"-cbox[e|o|-] <cropbox> Similar to the -grid option, but allows you to specify\n"
+" exact crop boxes from the source page which will become\n"
+" individual output pages. You may specify this option\n"
+" multiple times to crop out different parts of each source\n"
+" page. <cropbox> has the format <left>,<top>,<width>,<height>\n"
+" where all dimensions are in inches and are the distance from\n"
+" the upper-left corner of the source page. Default is no crop\n"
+" boxes. In general, this command option is meant to be\n"
+" used separately from other typical modes of conversion (see\n"
+" the -mode command). Example: -cbox 1,1,6,9. You can use\n"
+" -cbox- to clear all cropboxes. You can use -cboxe to specify\n"
+" a cropbox that applies only to even pages and -cboxo for odd\n"
+" pages. Specifying crop boxes will override both -grid and/or\n"
+" the default methods that are used to find \"red-box\" page\n"
+" regions. If you want an exact rendering of one cropbox\n"
+" per output page, precede -cbox with -mode copy -n, e.g.\n"
+" k2pdfopt myfile.pdf -mode copy -n -cbox 2,3,3,4\n"
"-col <maxcol> Set max number of columns. <maxcol> can be 1, 2, or 4.\n"
" Default is -col 2. -col 1 disables column searching.\n"
"-cg <inches> Minimum column gap width in inches for detecting multiple\n"
@@ -171,7 +196,7 @@ static char *k2pdfopt_options=
" following the grid option with other command options:\n"
" -n -wrap- -f2p -2 -vb -2 -col 1. For example, if you want\n"
" a column search done on each grid piece, you can put this:\n"
-" -grid 2x2 -col 2.\n"
+" -grid 2x2 -col 2. See also -cbox.\n"
"-gtc <inches> Threshold value for detecting column gaps (expert mode).\n"
" Sets how many of the pixels in the column shaft can be\n"
" non-white (total height of a line crossing the shaft in\n"
@@ -269,36 +294,54 @@ static char *k2pdfopt_options=
"-mode <mode> Shortcut for setting multiple options at once which\n"
" determine the basic way in which k2pdfopt will behave.\n"
" Available modes are:\n"
-" copy Same as -n- -wrap- -col 1 -vb -2 -w -1 -h -1\n"
+" copy Same as -n- -wrap- -col 1 -vb -2 -w 1s -h 1s\n"
" -dpi 150 -rt 0 -c -t- -f2p -2 -m 0 -om 0 -pl 0\n"
" -pr 0 -pt 0 -pb 0 -mc-. Makes k2pdfopt\n"
" behave exactly like my pdfr program--source\n"
" pages are simply copied to the output file, but\n"
" rendered as bitmaps. No trimming or re-sizing\n"
" is done. Can also use -mode pdfr.\n"
+" fp Also can use fitpage. Same as -n -wrap- -col 1\n"
+" -vb -2 -f2p -2 -t.\n"
" fw Same as -n -wrap- -col 1 -vb -2 -t -ls. Makes\n"
" k2pdfopt behave like sopdf's \"fit width\"\n"
" option. Can also use -mode sopdf.\n"
" 2col Same as -n -wrap- -col 2 -vb -2 -t.\n"
" Optimizes for a 2-column scientific article with\n"
" native PDF output.\n"
+" tm Trim margins--same as -mode copy, but sets the\n"
+" output to be trimmed to the margins and the width\n"
+" and height of the output to match the trimmed\n"
+" source pages. Also uses native mode. Equivalent\n"
+" to -n -wrap- -col 1 -vb -2 -f2p -2 -t -w 1t -h 1t\n"
+" -rt 0 -c -m 0 -om 0 -pl 0 -pr 0 -pt 0 -pb 0 -mc-.\n"
+" Can also use -mode trim.\n"
+" crop Used with -cbox option, puts each cropped area\n"
+" on a separate page, untrimmed, and sizes the\n"
+" page to the cropped region. Same as -wrap-\n"
+" -col 1 -vb -2 -w 1t -h 1t -t- -rt 0 -c -f2p -2\n"
+" -m 0 -om 0 -pad 0 -mc-\n"
" def Default k2pdfopt mode: -wrap -n- -col 2 -vb 1.75\n"
" -dev k2 -rt auto -c- -t -f2p 0 -m 0 -om 0.02\n"
" -ls-.\n"
" You can modify modes by overriding their options after\n"
" specifying the mode, e.g. -mode fw -vb -1.\n"
#ifdef HAVE_MUPDF_LIB
-"-n[-] Use \"native\" PDF output format, i.e. try to perserve the\n"
-" native source PDF contents, i.e. do not write the output\n"
-" PDF file as a set of bitmaps rendered from the source file\n"
-" but instead use the source PDF's native content along with\n"
+"-n[-] Use \"native\" PDF output format. NOTE: if you want native\n"
+" PDF output, it's probably best to use a -mode option like\n"
+" -mode fitwidth or -mode 2col, both of which automatically\n"
+" turn on native PDF output and optimize other settings for it.\n"
+" Native PDF output preserves the native source PDF contents,\n"
+" i.e. the output PDF file is not rendered as a sequence of\n"
+" bitmapped pages like in the default k2pdfopt output mode.\n"
+" Instead, the source PDF's native content is used along with\n"
" additional PDF instructions to translate, scale, and crop\n"
" the source content. With native PDF output, if the source\n"
" file has selectable text, the text remains selectable in\n"
" the output file. The output file can also be zoomed\n"
" without loss of fidelity. This may also result in a\n"
" smaller output file (but not always). By default, native\n"
-" PDF output format is turned off.\n"
+" PDF output format is turned off. See also -mode.\n"
" NOTES:\n"
" 1. Native PDF output cannot be used with text wrapping\n"
" on (see -wrap option). Turning it on will disable\n"
@@ -308,16 +351,18 @@ static char *k2pdfopt_options=
" the scanned document includes a layer of OCR text).\n"
" 3. Native PDF output is incompatible with OCR (see -ocr),\n"
" though OCR is typically not necessary if the native PDF\n"
-" contents are kept. Turning it on will disable OCR.\n"
+" contents are kept. Turning on native PDF output will\n"
+" disable OCR.\n"
" 4. Native PDF output can only be used with PDF source\n"
-" files.\n"
+" files (it does not work with DJVU source files).\n"
" 5. Contrast adjust, gamma correction, and sharpening\n"
" are disabled with native PDF output.\n"
" 6. It is recommended that you use -vb -2 with native PDF\n"
" output, particularly if you are having difficulty\n"
" selecting/searching text in the output PDF file.\n"
-" 7. This option works well with -mode fw or with the\n"
-" -grid option. It is used by default in those cases.\n"
+" 7. This option works well with -mode fw, -mode 2col, or\n"
+" with the -grid option. It is used by default in those\n"
+" cases.\n"
#endif
"-neg[-] Inverse [don't inverse] the output images (white letters\n"
" on black background, or \"night mode\").\n"
@@ -413,11 +458,14 @@ static char *k2pdfopt_options=
"-p <pagelist> Specify pages to convert. <pagelist> must not have any\n"
" spaces. E.g. -p 1-3,5,9,10- would do pages 1 through 3,\n"
" page 5, page 9, and pages 10 through the end.\n"
+"-pad <padlist> A shortcut for -pl, -pt, -pr, -pb. E.g. -pad 15,10,13,20\n"
+" is the same as -pl 15 -pt 10 -pr 13 -pb 20. Also, using\n"
+" -pad 15 will set all pads to 15, for example.\n"
"-p[b|l|r|t] <nn> Pad [bottom|left|right|top] side of destination bitmap with\n"
" <nn> rows. Defaults = 4 (bottom), 0 (left), 3 (right), and\n"
" 0 (top). Example: -pb 10. This is typically only used on\n"
" certain devices to get the page to come out just right. For\n"
-" setting margins on the output device, use -om.\n"
+" setting margins on the output device, use -om. See also -pad.\n"
/*
"-pi[-] Preserve [don't preserve] indentation when wrapping text,\n"
" e.g. if the first line of each paragraph is indented, keep\n"
@@ -463,6 +511,39 @@ static char *k2pdfopt_options=
" any output region. Default is to trim. Using -t- is not\n"
" recommended unless you want to exactly duplicate the source\n"
" document.\n"
+"-toc[-] Include [don't include] table of contents / outline /\n"
+" bookmark information in the PDF output if it is available\n"
+" in the source file (works only for PDF source files and\n"
+" only if MuPDF is compiled in). By default, a new destination\n"
+" page is started at each bookmark location. Do disable this,\n"
+" see the -bp option. If -toc- is specified, bookmark\n"
+" information from the source file is ignored. See also\n"
+" -toclist. Default is -toc.\n"
+"-toclist <pagelist>|<file> Override the PDF source file's outline information\n"
+" (bookmarks / table of contents) with either a list of source\n"
+" pages or a file describing the table of contents. If you\n"
+" specify a list of pages, e.g. -toclist 5,10,20,40,100\n"
+" then those pages are marked as Chapter 1, 2, etc.,\n"
+" respectively. If you specify a file name, the file should be\n"
+" a text file formatted like this example:\n"
+" 1 Introduction\n"
+" 10 Chapter 1\n"
+" +10 Chapter 1, Part A\n"
+" +25 Chapter 1, Part B\n"
+" ++25 Chapter 1, Part B, Subsection 1\n"
+" ++27 Chapter 1, Part B, Subsection 2\n"
+" +30 Chapter 1, Part C\n"
+" 50 Chapter 2\n"
+" 70 Chapter 3\n"
+" The '+' indicates a sub-level heading (multiple +'s for\n"
+" multiple sub-levels). The first number on the line is the\n"
+" source page reference number. The rest of the text on the\n"
+" line is the name of the chapter / subheading.\n"
+" Note: This option overrides -toc. To get a template from\n"
+" an existing PDF file, see the -tocsave option.\n"
+"-tocsave <file> If an outline exists in the PDF file (and -toc is specified)\n"
+" write that outline to text file <file> in the format required\n"
+" by -toclist. See -toc, -toclist.\n"
"-ui[-] User input query turned on [off]. Default = on for linux or\n"
" if not run from command line in Windows.\n"
"-v Verbose output.\n"
View
76 k2pdfoptlib/k2version.c
@@ -1,4 +1,4 @@
-char *k2pdfopt_version = "v2.02";
+char *k2pdfopt_version = "v2.12";
/*
** k2version.c K2pdfopt version number and history.
**
@@ -18,6 +18,80 @@ char *k2pdfopt_version = "v2.02";
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
** VERSION HISTORY
+** v2.12 30 NOV 2013
+** BUG FIXES
+** - No longer writes k2pdfopt_out.png when previewing in the GUI.
+** - Removed DLL dependencies from 64-bit Windows compile.
+**
+** v2.11 28 NOV 2013
+** BUG FIXES (MW WINDOWS GUI)
+** - Several routines in k2gui_cbox.c which are called from k2file.c
+** during the conversion were not correctly working during a preview
+** and were resulting in garbage sometimes being sent to the
+** desktop screen if the preview button was clicked after a file
+** conversion. This has been fixed.
+**
+** v2.10 23 NOV 2013
+** NEW FEATURES
+** - The PDF "Outlines" tree (often called "bookmarks" by PDF viewers)
+** that helps you navigate the PDF file and is usually shown in the left
+** pane of the PDF viewer is now preserved in the converted file. Or
+** you can create your own bookmarks from a simple text file if your
+** PDF source file doesn't have one (or if you want to change it).
+** See the -toc, -toclist, and -tocsave command-line options.
+** (toc = Table of Contents.) Destination page breaks are forced
+** at outline anchor pages by default (see -bp option).
+** - A new -cbox option allows you to specify a crop box to be applied
+** to each page. You can specify more than one, and each separate
+** crop box will be rendered to a different output page, similar to
+** the way the -grid option works. See -cbox in the command usage.
+** Using -mode crop with -cbox, you can crop a source PDF file to
+** a destination PDF file. You can specify different crop boxes
+** for even and odd pages, as well.
+** - The -bpl option now allows you to specify a list of source pages
+** where destination page breaks will be forced.
+** - Three new modes: -mode trim causes the source page to be trimmed and
+** the destination to be sized to the trimmed source. -mode fitpage
+** is similar, but squeezes the trimmed source page into the specified
+** device output screen size. -mode crop is a complement to the -cbox
+** option and causes each cropped box to be placed on a new page the
+** size of the cropped box.
+**
+** ENHANCEMENTS
+** - Windows versions are compiled with gcc 4.8.2.
+** - The Win64 binary is now compressed with UPX 3.91w which finally is
+** able to compress the Win64/PE format.
+**
+** BUG FIXES
+** - In native output, consecutive streams now delimited by white space.
+** http://www.mobileread.com/forums/showthread.php?p=2655550#post2655550
+** - Pages with no "/Contents" entry are correctly handled.
+** - Re-wrote masterinfo_break_point() to make use of
+** bmpregion_find_textrows() so that decisions on where to break
+** pages in the "fitwidth" mode should be more consistent and also
+** will be affected by the -gtr option.
+** http://www.mobileread.com/forums/showthread.php?p=2686067#post2686067
+** - Removed last vestiges of -pi option (interactive menu 'w' option
+** was incorrectly still using it).
+** - The vert_line_erase() function in k2bmp.c correctly handle the
+** cbmp pointer when it is an 8-bit bitmap now.
+** - Fixed a flow problem in k2file.c (k2pdfopt_proc_one() function)
+** which was causing the GUI preview not to work with -mode copy.
+** - The textrows_remove_small_rows() function no longer includes
+** figures (REGION_TYPE_FIGURE) when doing statistics on the row
+** heights.
+**
+** v2.03 21 SEP 2013
+** ENHANCEMENTS
+** - MuPDF library now uses the Sumatra versions of pdf-font.c and
+** pdf-fontfile.c so that it correctly checks Windows system fonts
+** for non-embedded fonts in the PDF file.
+**
+** BUG FIXES
+** - Native mode is correctly turned off as the default setting.
+** - Native mode output works correctly from the MS Windows GUI.
+** - Check boxes made consistent (native/wrap/OCR) with quick
+** sanity check call.
**
** v2.02 16 SEP 2013
** ENHANCEMENTS
View
16 k2pdfoptlib/pagelist.c
@@ -40,6 +40,22 @@ int pagelist_valid_page_range(char *s)
}
+int pagelist_includes_page(char *pagelist,int pageno,int maxpages)
+
+ {
+ int i,n;
+
+ /* Sort of arbitrary */
+ if (maxpages < 0)
+ maxpages = 99999;
+ n=pagelist_count(pagelist,maxpages);
+ for (i=0;i<n;i++)
+ if (pagelist_page_by_index(pagelist,i,maxpages)==pageno)
+ return(1);
+ return(0);
+ }
+
+
int pagelist_page_by_index(char *pagelist,int index,int maxpages)
{
View
29 k2pdfoptlib/textrows.c
@@ -227,11 +227,14 @@ void textrows_compute_row_gaps(TEXTROWS *textrows,int r2)
}
+/*
+** v2.10: Tosses out figures for computing statistics
+*/
void textrows_remove_small_rows(TEXTROWS *textrows,K2PDFOPT_SETTINGS *k2settings,
double fracrh,double fracgap,BMPREGION *region)
{
- int i,j,mg,mh,mg0,mg1;
+ int i,j,mg,mh,mg0,mg1,nr,ng;
int c1,c2,nc;
int *rh,*gap;
static char *funcname="textrows_remove_small_rows";
@@ -246,19 +249,27 @@ k2printf("@textrows_remove_small_rows(fracrh=%g,fracgap=%g)\n",fracrh,fracgap);
nc=c2-c1+1;
willus_dmem_alloc_warn(16,(void **)&rh,2*sizeof(int)*textrows->n,funcname,10);
gap=&rh[textrows->n];
- for (i=0;i<textrows->n;i++)
+ for (i=nr=ng=0;i<textrows->n;i++)
{
- rh[i]=textrows->textrow[i].r2-textrows->textrow[i].r1+1;
+ /* v2.10: Don't include figures in statistics */
+ if (textrows->textrow[i].type==REGION_TYPE_FIGURE)
+ continue;
+ rh[nr++]=textrows->textrow[i].r2-textrows->textrow[i].r1+1;
if (i<textrows->n-1)
- gap[i]=textrows->textrow[i].gapblank;
+ gap[ng++]=textrows->textrow[i].gapblank;
}
- sorti(rh,textrows->n);
- sorti(gap,textrows->n-1);
- mh=rh[textrows->n/2];
+ if (nr<2)
+ {
+ willus_dmem_free(16,(double **)&rh,funcname);
+ return;
+ }
+ sorti(rh,nr);
+ sorti(gap,ng);
+ mh=rh[nr/2];
mh *= fracrh;
if (mh<1)
mh=1;
- mg0=gap[(textrows->n-1)/2];
+ mg0=gap[ng/2];
mg = mg0*fracgap;
mg1 = mg0*0.7;
if (mg<1)
@@ -267,6 +278,7 @@ k2printf("@textrows_remove_small_rows(fracrh=%g,fracgap=%g)\n",fracrh,fracgap);
k2printf("mh = %d x %g = %d\n",rh[textrows->n/2],fracrh,mh);
k2printf("mg = %d x %g = %d\n",gap[textrows->n/2],fracgap,mg);
#endif
+ willus_dmem_free(16,(double **)&rh,funcname);
for (i=0;i<textrows->n;i++)
{
TEXTROW *textrow;
@@ -357,7 +369,6 @@ k2printf(" mh = %d, mg = %d\n",rh[textrows->n/2],gap[(textrows->n-1)/2]);