-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
59 lines (46 loc) · 1.98 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from flask import Flask, render_template, request, jsonify
from bs4 import BeautifulSoup
app = Flask(__name__)
def text_format(aster, insert, factor, note_url):
"""Format the markdown with appropriate tabs and asterisks."""
return '\t' * factor + ('*' if aster else '') + f' [{insert}]({note_url})\n'
def bear_note_url(identifier, title):
"""Return a callback URL for Bear app."""
title=title.replace(" ","%20")
return f'bear://x-callback-url/open-note?id={identifier}&header={title}' if identifier else ""
def process_html(html_content, identifier, checks):
"""Process the HTML content and return a markdown formatted Table of Contents."""
bs = BeautifulSoup(html_content, 'html.parser')
collectList = bs.find_all(['h1', 'h2', 'h3', 'h4', 'h5'])
scrapResult = "# Table of Contents\n"
for tag in collectList:
insert = tag.get_text()
note_url = bear_note_url(identifier, insert)
scrapResult += text_format(checks[tag.name], insert, int(tag.name[1]) - 1, note_url)
scrapResult += '***\n'
return scrapResult
@app.route('/')
def index():
"""Render the main page."""
return render_template('index.html')
@app.route('/scrape', methods=['POST'])
def scrape():
"""Scrape the HTML content and return a Table of Contents."""
html_file = request.files.get('html_file')
if not html_file:
return jsonify({'error': 'HTML file not provided!'}), 400
html_content = html_file.read().decode('utf-8')
identifier = request.form['identifier']
if not identifier:
return jsonify({'error': 'Identifier not provided!'}), 400
checks = {
'h1': request.form.get('checkh1', ''),
'h2': request.form.get('checkh2', ''),
'h3': request.form.get('checkh3', ''),
'h4': request.form.get('checkh4', ''),
'h5': request.form.get('checkh5', '')
}
scrap_result = process_html(html_content, identifier, checks)
return scrap_result
if __name__ == '__main__':
app.run(debug=True)