Permalink
Cannot retrieve contributors at this time
#!/usr/bin/env python3 | |
# | |
# This script gets a HTML annotations file exported from the Kindle app | |
# and convert it to markdown. | |
# | |
# Instructions: | |
# 1. Download this script. | |
# 2. Be sure to have python installed. | |
# 3. Install python dependencies: pip3 install --user beautifulsoup4 | |
# 4. Run the script: python3 kindle2md YOUR_FILE.html | |
# | |
# Copyright (C) 2021 Rafael Cavalcanti - rafaelc.org | |
# Licensed under GPLv3 | |
# | |
from bs4 import BeautifulSoup | |
from pathlib import Path | |
from os.path import basename, splitext | |
from sys import argv, exit | |
script_name = basename(__file__) | |
if len(argv) != 2: | |
print(f'Usage: {script_name} html_file') | |
exit(1) | |
source_name = argv[1] | |
dest_name = splitext(source_name)[0] + '.md' | |
source = Path(source_name) | |
dest = Path(dest_name) | |
if dest.exists(): | |
print(f'Destination file "{dest}" already exists.') | |
answer = input('Overwrite? [y/n] ') | |
if answer.lower().strip() != 'y': | |
exit(1) | |
try: | |
file_content = source.read_text() | |
except OSError as e: | |
print(f'Failed to read file: {e}.') | |
exit(1) | |
soup = BeautifulSoup(file_content, 'html.parser') | |
try: | |
bookTitle = soup.select_one('.bookTitle').contents[0].strip() | |
textElements = soup.select('.noteText') | |
descElements = soup.select('.noteHeading') | |
texts = [elem.contents[0].strip() for elem in textElements] | |
types = [elem.contents[0][:-2].strip() for elem in descElements] | |
pages = [elem.contents[2][5:].strip() for elem in descElements] | |
except AttributeError as e: | |
print(f'Error parsing file: {e}') | |
exit(1) | |
output = f'# {bookTitle}\n\n' | |
for i in range(len(texts)): | |
output += f'- {texts[i]}\n' | |
output += f'\t{types[i]} @ {pages[i]}\n' | |
output += '\n' | |
try: | |
dest.write_text(output) | |
except OSError as e: | |
print(f'Failed to write file: {e}') | |
exit(1) | |
print(f'Written to: {dest_name}') |