/
default_config.py
173 lines (149 loc) · 6.07 KB
/
default_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""Options for the ebooktools.py script
`ebooktools.py`_ is a Python port of the Shell scripts `ebook-tools` developed
by `na--`_ for organizing and managing ebook collections.
The options are described in full details in the Python project's `README`_
IMPORTANT: these options can also be passed to the Python script
`ebooktools.py`_ via command-line parameters. Also, command-line parameters
supersede the variables defined in this configuration file. Most parameters are
not required and if nothing is specified, the default value will be used.
The options are separated based on sections:
1. General options
1.1 General control flags
1.2 Options related to extracting ISBNs from files and finding metadata by ISBN
1.3 Options for OCR
1.4 Options related to extracting and searching for non-ISBN metadata
1.5 Options related to the input and output files
1.6 Miscellaneous options
2. Subcommand options
2.1 edit-config-files
2.2 organize-ebooks
2.2.1 Specific options for organizing files
2.2.2 Output options
2.3 interactive-organizer
2.4 find-isbns
2.5 convert-to-txt
2.6 rename-calibre-library
2.7 split-into-folders
References
----------
* `ebook-tools`_
.. external links
.. _ebook-tools: https://github.com/na--/ebook-tools
.. _ebooktools.py: https://github.com/raul23/pyebooktools/blob/master/pyebooktools/scripts/ebooktools.py
.. _na--: https://github.com/na--
.. _README: https://github.com/raul23/pyebooktools#readme
"""
import os
from pyebooktools.configs import get_without_isbn_ignore
# ==================
# 1. General options
# ==================
# All of these options are part of the common library and may affect some or
# all of the subcommands.
# 1.1 General control flags
# =========================
quiet = False
verbose = False
dry_run = False
symlink_only = False
keep_metadata = False
logging_level = 'info'
logging_formatter = 'simple'
# 1.2 Options related to extracting ISBNs from files and finding metadata by ISBN
# ===============================================================================
isbn_regex = '(?<![0-9])(-?9-?7[789]-?)?((-?[0-9]-?){9}[0-9xX])(?![0-9])'
isbn_blacklist_regex = '^(0123456789|([0-9xX])\\2{9})$'
isbn_direct_grep_files = '^text/(plain|xml|html)$'
isbn_ignored_files = '^(image/(gif|svg.+)|application/(x-shockwave-flash|CDFV2|vnd.ms-opentype|x-font-ttf|x-dosexec|vnd.ms-excel|x-java-applet)|audio/.+|video/.+)$'
isbn_grep_rf_scan_first = 400
isbn_grep_rf_reverse_last = 50
# False to disable the functionality or (first_lines,last_lines) to enable it
isbn_grep_reorder_files = (isbn_grep_rf_scan_first, isbn_grep_rf_reverse_last)
# NOTE: If you use Calibre versions that are older than 2.84, it's required to
# manually set the following option to an empty string.
isbn_metadata_fetch_order = ['Goodreads', 'Amazon.com', 'Google', 'ISBNDB', 'WorldCat xISBN', 'OZON.ru']
# 1.3 Options for OCR
# ===================
ocr_enabled = False
ocr_only_first_last_pages = (7, 3)
ocr_command = 'tesseract_wrapper'
# 1.4 Options related to extracting and searching for non-ISBN metadata
# =====================================================================
token_min_length = 3
tokens_to_ignore = 'ebook|book|novel|series|ed(ition)?|vol(ume)?|${RE_YEAR}'
organize_without_isbn_sources = ['Goodreads' ,'Amazon.com', 'Google']
# 1.5 Options related to the input and output files
# =================================================
output_folder = os.getcwd()
output_filename_template = "${d[AUTHORS]// & /, } - ${d[SERIES]:+[${d[SERIES]}] " \
"- }${d[TITLE]/:/ -}${d[PUBLISHED]:+ (${d[PUBLISHED]%%-*})}" \
"${d[ISBN]:+ [${d[ISBN]}]}.${d[EXT]}"
# If `keep_metadata` is enabled, this is the extension of the additional
# metadata file that is saved next to each newly renamed file.
output_metadata_extension = 'meta'
# 1.6 Miscellaneous options
# =========================
# file_sort_flags = []
# Reverse sort
reverse = False
# debug_prefix_length = 40
# =====================
# 2. Subcommand options
# =====================
# 2.1 edit-config-files
# =====================
# Name of the application to use for editing the config file.
# If no name is given, then the default application for opening this type of
# file will be used.
app = None
# 2.2 organize-ebooks
# ===================
# 2.2.1 Specific options for organizing files
# -------------------------------------------
corruption_check_only = False
tested_archive_extensions = '^(7z|bz2|chm|arj|cab|gz|tgz|gzip|zip|rar|xz|tar|epub|docx|odt|ods|cbr|cbz|maff|iso)$'
organize_without_isbn = False
without_isbn_ignore = get_without_isbn_ignore()
# TODO: why '?' in pptx, see https://bit.ly/2ryWlgt
pamphlet_included_files = '\.(png|jpg|jpeg|gif|bmp|svg|csv|pptx?)$'
pamphlet_excluded_files = '\.(chm|epub|cbr|cbz|mobi|lit|pdb)$'
pamphlet_max_pdf_pages = 50
pamphlet_max_filesize_kib = 250
# 2.2.2 Output options
# --------------------
# output_folder = os.getcwd()
# If organize_without_isbn is enabled, this is the folder to which all ebooks
# that were renamed based on non-ISBN metadata will be moved to.
output_folder_uncertain = None
# If specified, corrupt files will be moved to this folder.
output_folder_corrupt = None
# If specified, pamphlets will be moved to this folder.
output_folder_pamphlets = None
# 2.3 interactive-organizer
# =========================
# output_folders = []
# quick_mode = False
# custom_move_base_dir = None
# restore_original_base_dir = None
# diacritic_difference_masking = ?
# match_partial_words = False
# 2.4 find-isbns
# ==============
# Some general options affect this subcommand (especially the ones related to
# extracting ISBNs from files, see section 1.2 above).
isbn_ret_separator = '\n'
# 2.5 convert-to-txt
# ==================
# Some of the general options affect this subcommand's behavior a lot, especially
# the OCR ones (see '1.3 Options for OCR').
output_file = 'output.txt'
# 2.6 rename-calibre-library
# ==========================
# output_folder = os.getcwd()
save_metadata = 'recreate'
# 2.7 split-into-folders
# ======================
# output_folder = os.getcwd()
start_number = 0
folder_pattern = '%05d000'
files_per_folder = 1000