forked from gfjaru/Kiko
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessor.py
More file actions
160 lines (120 loc) · 5.37 KB
/
preprocessor.py
File metadata and controls
160 lines (120 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import re
import os
import shutil
PATH_OUTPUT = "docs"
PATH_LYRICS = "lyrics"
lyrics_id_names = {
1: ["translation"],
2: ["transcription", "translation"],
3: ["transcription", "transliteration", "translation"],
}
YAML_SEPARATOR = "---\n"
YAML_KEY_VALUE = "{0: <16}: {1}\n"
def make_artist(artist_id):
artist_folder = os.path.join(PATH_OUTPUT, PATH_LYRICS, artist_id)
posts_folder = os.path.join(artist_folder, "_posts")
try:
os.makedirs(posts_folder)
with open(os.path.join(artist_folder, "index.html"), "w", encoding="utf8") as fh:
fh.write(YAML_SEPARATOR)
fh.write(YAML_KEY_VALUE.format("layout", "artist"))
fh.write(YAML_KEY_VALUE.format("artist_id", artist_id))
fh.write(YAML_SEPARATOR)
except:
pass
return artist_folder, posts_folder
def process_content(content):
return content.strip().replace("\n", "<br>\n")
def parse_artists_config(path_artist_config):
configured_artists = set()
with open(path_artist_config, "r", encoding="utf8") as f:
for line in f:
match = re.match(r"^([\w-]+):$", line.strip())
if match:
artist_id = match.group(1)
configured_artists.add(artist_id)
return configured_artists
HTML_LYRICS = '<div class="lyrics">\n{}</div>\n'
HTML_SECTION = '<div class="lyrics-section">\n{}</div>\n'
HTML_SECTION_PART = '<p class="lyrics-{}">\n{}\n</p>\n'
class LyricsFile:
def __init__(self, file_name, artist_id):
self.file_name = file_name
self.artist_ids = [artist_id]
self.lang = ""
self.titles = []
self.sections = []
self.post = ""
def _parse_header(self, header):
for match in re.finditer(r"([a-zA-Z\-_]+):\s+(.+$)", header, re.M):
key, value = match.groups()
value_split = [x.strip().replace(";com", ",") for x in value.split(",")]
if key == "lang":
self.lang = value
elif key == "feat":
self.artist_ids.extend(value_split)
elif key == "title":
self.titles.extend(value_split)
def _parse_contents(self, contents):
for match in re.finditer(r"([\s\S]*?)(?:^\/\/\s*$|$(?![\r\n]))", contents, re.M):
section = match.group(1).strip()
if len(section) > 0:
parts = [x.strip() for x in re.split(r"\n{2,}", section)]
self.sections.append(parts)
def parse(self, text):
header, post, contents = [x.group(1) for x in re.finditer(r"([\s\S]*?)(?:^\/\/-{3,}$|$(?![\r\n]))", text, re.M)][:3]
self.post = post.strip()
self._parse_header(header)
self._parse_contents(contents)
return self
def generate(self):
artist_folder, posts_folder = make_artist(self.artist_ids[0])
for artist_id in self.artist_ids[1:]:
make_artist(artist_id)
file_name = os.path.splitext(self.file_name)[0]
with open(os.path.join(posts_folder, file_name + ".md"), "w", encoding="utf8") as fh:
fh.write(YAML_SEPARATOR)
fh.write(YAML_KEY_VALUE.format("layout", "post"))
fh.write(YAML_KEY_VALUE.format("title", '"' + "・".join(self.titles) + '"'))
fh.write(YAML_KEY_VALUE.format("artist_ids", str(self.artist_ids)))
fh.write(YAML_SEPARATOR)
fh.write("\n")
fh.write(self.post)
fh.write("\n\n")
fh.write("---")
fh.write("\n\n")
lyrics_inner = ""
for section in self.sections:
section_inner = ""
id_names = lyrics_id_names[len(section)]
for part_no, part in enumerate(section):
section_inner += HTML_SECTION_PART.format(id_names[part_no], process_content(part))
lyrics_inner += HTML_SECTION.format(section_inner)
fh.write(HTML_LYRICS.format(lyrics_inner))
def main():
# delete previously generated output
path_out_lyrics = os.path.join(PATH_OUTPUT, PATH_LYRICS)
if os.path.isdir(path_out_lyrics):
shutil.rmtree(path_out_lyrics)
# copy artist.yml file
path_original_artists_config = os.path.join(PATH_LYRICS, "artists.yml")
shutil.copy2(path_original_artists_config,
os.path.join(PATH_OUTPUT, "_data"))
configured_artists = parse_artists_config(path_original_artists_config)
print(f"Found {len(configured_artists)} configured artists in {path_original_artists_config}.")
# preprocess lyrics files
for artist_id in os.listdir(PATH_LYRICS):
artist_dir = os.path.join(PATH_LYRICS, artist_id)
if not os.path.isdir(artist_dir):
continue
if artist_id not in configured_artists:
raise RuntimeError(f"Did not find artist '{artist_id}' in the lyrics/artists.yml file. Don't forget to update that file!")
for lyrics_file_name in os.listdir(artist_dir):
name = re.match(r"(?:\d+-){3}(.+)\.", lyrics_file_name).group(1)
with open(os.path.join(artist_dir, lyrics_file_name), encoding="utf8") as fh:
LyricsFile(lyrics_file_name, artist_id)\
.parse(fh.read())\
.generate()
print(f"Processed {lyrics_file_name} for artist '{artist_id}'.")
if __name__ == "__main__":
main()