Skip to content
Permalink
master
Go to file
 
 
Cannot retrieve contributors at this time
executable file 70 lines (58 sloc) 1.8 KB
#!/usr/bin/env python3
#
# This script gets a HTML annotations file exported from the Kindle app
# and convert it to markdown.
#
# Instructions:
# 1. Download this script.
# 2. Be sure to have python installed.
# 3. Install python dependencies: pip3 install --user beautifulsoup4
# 4. Run the script: python3 kindle2md YOUR_FILE.html
#
# Copyright (C) 2021 Rafael Cavalcanti - rafaelc.org
# Licensed under GPLv3
#
from bs4 import BeautifulSoup
from pathlib import Path
from os.path import basename, splitext
from sys import argv, exit
script_name = basename(__file__)
if len(argv) != 2:
print(f'Usage: {script_name} html_file')
exit(1)
source_name = argv[1]
dest_name = splitext(source_name)[0] + '.md'
source = Path(source_name)
dest = Path(dest_name)
if dest.exists():
print(f'Destination file "{dest}" already exists.')
answer = input('Overwrite? [y/n] ')
if answer.lower().strip() != 'y':
exit(1)
try:
file_content = source.read_text()
except OSError as e:
print(f'Failed to read file: {e}.')
exit(1)
soup = BeautifulSoup(file_content, 'html.parser')
try:
bookTitle = soup.select_one('.bookTitle').contents[0].strip()
textElements = soup.select('.noteText')
descElements = soup.select('.noteHeading')
texts = [elem.contents[0].strip() for elem in textElements]
types = [elem.contents[0][:-2].strip() for elem in descElements]
pages = [elem.contents[2][5:].strip() for elem in descElements]
except AttributeError as e:
print(f'Error parsing file: {e}')
exit(1)
output = f'# {bookTitle}\n\n'
for i in range(len(texts)):
output += f'- {texts[i]}\n'
output += f'\t{types[i]} @ {pages[i]}\n'
output += '\n'
try:
dest.write_text(output)
except OSError as e:
print(f'Failed to write file: {e}')
exit(1)
print(f'Written to: {dest_name}')
You can’t perform that action at this time.