-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgen_blog.py
More file actions
executable file
·160 lines (122 loc) · 4.57 KB
/
gen_blog.py
File metadata and controls
executable file
·160 lines (122 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env nix-shell
#! nix-shell -i python3 -p python3.pkgs.mistune
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# Version 2, December 2004
#
# Copyright (C) 2024 Thiago Kenji Okada <thiagokokada@gmail.com>
#
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
#
# 0. You just DO WHAT THE FUCK YOU WANT TO.
"""\
Usage: gen_blog.py
Needs to be run at the root directory of the repository.
Will output the generated README.md file to the stdout, you can redirect its
contents by:
$ ./gen_blog.py > README.md
Will also generate the RSS file in `rss.xml`.\
"""
import os
import re
import sys
import xml.etree.cElementTree as ET
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from urllib.parse import urljoin
from typing import Optional
try:
import mistune
except ImportError:
print(
"WARN: mistune not available, markdown contents is unavailable", file=sys.stderr
)
mistune = None
README_TEMPLATE = """\
# Blog
Mirror of my blog in https://kokada.capivaras.dev/.
## Posts
[](https://raw.githubusercontent.com/thiagokokada/blog/main/rss.xml)
{posts}\
"""
RSS_TITLE = "kokada's blog"
RSS_LINK = "https://github.com/thiagokokada/blog"
RSS_DESCRIPTION = "dd if=/dev/urandom of=/dev/brain0"
RSS_POST_LINK_PREFIX = f"{RSS_LINK}/blob/main/"
@dataclass
class Post:
title: str
file: str
contents: Optional[str] = None
Posts = dict[datetime, list[Post]]
def grab_posts(pwd: Path) -> Posts:
posts = defaultdict(list)
for dir in sorted(pwd.iterdir(), reverse=True):
# Ignore non-directories or hidden files
if not dir.is_dir() or dir.name[0] == ".":
continue
# Try to parse date from directory name
try:
date = datetime.strptime(dir.name, "%Y-%m-%d")
except ValueError:
print(f"WARN: ignoring non-date directory: {dir}", file=sys.stderr)
continue
# Iterate between the files in the date directory
for dateDir in sorted(dir.iterdir(), reverse=True):
# Ignore non-markdown files or hidden files (draft)
if not dateDir.suffix == ".md" or dateDir.name[0] == ".":
continue
# Grab the first H1 section to parse as title
text = dateDir.read_text()
mTitle = re.match(r"# (?P<title>.*)\r?\n", text)
if mTitle and (title := mTitle.groupdict().get("title")):
post = Post(title=title, file=str(dateDir))
if mistune:
post.contents = mistune.html(text)
posts[date].append(post)
else:
print(f"WARN: did not find title for file: {post}", file=sys.stderr)
return posts
def gen_readme(posts: Posts):
titles = []
for date, dayPosts in posts.items():
for post in dayPosts:
link = os.path.join(".", post.file) # to format as ./{filepath}
title = date.strftime(f"- [{post.title}]({link}) - %Y-%m-%d")
titles.append(title)
print(README_TEMPLATE.format(posts="\n".join(titles)))
def gen_rss(posts: Posts):
rss = ET.Element("rss", version="2.0")
channel = ET.SubElement(rss, "channel")
ET.SubElement(channel, "title").text = RSS_TITLE
ET.SubElement(channel, "link").text = RSS_LINK
ET.SubElement(channel, "description").text = RSS_DESCRIPTION
for date, dayPost in posts.items():
for post in dayPost:
item = ET.SubElement(channel, "item")
link = urljoin(RSS_POST_LINK_PREFIX, post.file)
ET.SubElement(item, "title").text = post.title
ET.SubElement(item, "guid").text = link
ET.SubElement(item, "description").text = post.contents
ET.SubElement(item, "link").text = link
ET.SubElement(item, "pubDate").text = date.strftime(
"%a, %d %b %Y %H:%M:%S GMT"
)
tree = ET.ElementTree(rss)
ET.indent(tree, space="\t", level=0)
tree.write("rss.xml", xml_declaration=True, encoding="UTF-8")
def main():
if "-h" in sys.argv:
print(__doc__, file=sys.stderr)
sys.exit(0)
posts = grab_posts(Path())
gen_readme(posts)
gen_rss(posts)
if __name__ == "__main__":
main()