Skip to content

Commit

Permalink
Added: [pdftool] new subcommand text. pdf convert to text
Browse files Browse the repository at this point in the history
  • Loading branch information
mypaceshun committed Dec 22, 2021
1 parent d49f418 commit 770a04c
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 2 deletions.
150 changes: 149 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ python = "^3.7"
click = "^8.0"
gitignore-parser = "^0.0.8"
pikepdf = "^4.1.0"
"pdfminer.six" = "^20211012"

[tool.poetry.dev-dependencies]
pre-commit = "^2.16.0"
Expand Down
54 changes: 53 additions & 1 deletion shuncommands/pdftool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path

import click
from pdfminer.high_level import extract_text
from pikepdf import Pdf


Expand Down Expand Up @@ -44,11 +45,62 @@ def unlock(pdffile, output, password, force):
output = Path(pdffile).with_suffix('.unlock.pdf')
else:
output = Path(output)
if output.exists() or not force:
if output.exists() and not force:
click.confirm(f'{output} is exists. Do you want to override?',
default=True,
abort=True,
show_default=True)

unlockpdf.save(output)
click.echo(f'output pdf file: {output}')


@ctx.command()
@click.version_option(None, '-v', '--version')
@click.help_option('-h', '--help')
@click.option('password', '-p',
help='decrypt password')
@click.option('--output', '-o',
help='output filename',
type=click.Path())
@click.option('--force', '-f',
help='<output> override',
is_flag=True)
@click.option('--strip', '-s',
help='strip to output line',
is_flag=True)
@click.option('--remove-zero-line', '-0',
help='remove zero length line and strip',
is_flag=True)
@click.argument('pdffile',
required=True,
type=click.Path(exists=True))
def text(pdffile, output, password, force, strip, remove_zero_line):
'''
PDFファイルを、テキストファイルに変換する
'''
click.echo(f'input pdf file: {pdffile}')
if password is not None:
click.echo('pdf file password: xxx')
else:
password = ''
pdf_text = extract_text(pdffile, password)
pdf_text_list = pdf_text.split('\n')
if strip or remove_zero_line:
_pdf_text_list = [line.strip() for line in pdf_text_list]
pdf_text_list = _pdf_text_list
if remove_zero_line:
_pdf_text_list = [line for line in pdf_text_list if len(line) > 0]
pdf_text_list = _pdf_text_list
if not output:
click.echo('\n'.join(pdf_text_list))
else:
output_path = Path(output).expanduser()
if output_path.exists() and not force:
click.confirm(f'{output} is exists. Do you want to override?',
default=True,
abort=True,
show_default=True)
with output_path.open('w') as fd:
fd.write('\n'.join(pdf_text_list))
click.echo(f'output to {output}')

0 comments on commit 770a04c

Please sign in to comment.