-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_site.py
649 lines (555 loc) Β· 25.2 KB
/
generate_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import markdown2
from bs4 import BeautifulSoup
from slugify import slugify
from dateutil.parser import parse
import os, shutil, sys, hashlib, time, requests, re
from datetime import datetime
from PIL import Image
from email import utils
from curses import ascii
from collections import Counter
from argparse import ArgumentParser
from tqdm import tqdm
import PyRSS2Gen
script_run_time = datetime.now()
########################### Settings ################################
SITE_TITLE = 'lambdan.se'
SITE_TITLE_SUFFIX = ' - ' + SITE_TITLE # at the end of every <title>
CSS_FILE = 'css-oled-2022.css' # should be in the root of ./includes/
AUTHOR_NAME = 'djs'
AUTHOR_EMAIL = 'david@lambdan.se' # these are in the footer
AUTHOR_TWITTER = 'nadbmal' # no @
SITE_STARTED_YEAR = 2012 # used for (c) in the footer
IMAGE_SIZE_THRESHOLD = 300000 # (bytes) images smaller than this won't get a thumbnail
THUMBNAIL_MAX_RESOLUTION = 1000,1000 # max resolution for thumbnails in each axis (aspect ratio is preserved)
POSTS_DIR = './posts/'
IMAGES_FOLDER = './images/'
THUMBS_FOLDER = './images/thumbs/'
INCLUDE_FOLDER = './includes/'
OTHER_PAGES_FOLDER = './pages/'
VALID_POST_EXTENSIONS = ['.txt', '.text.', '.md', '.markdown']
STATS_WHITELISTED_CHARACTERS = set('abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ Γ₯Àâ Γ
ΓΓ')
#####################################################################
# handle arguments
parser = ArgumentParser()
parser.add_argument("--output-folder", '-o', action='store', dest='folder', help='generated website appears here (default: ./_output/')
parser.add_argument("--root-url", '-url', action='store', dest='url', help='root url of website (example: https://lambdan.se/blog/)', required=True)
parser.add_argument('--regenerate-thumbs', action="store_true", dest='regenThumbs', help="force re-generation of thumbnails", default=False)
parser.add_argument('--verbose', '-v', action="store_true", dest='verbose', help="print alot of info", default=False)
parser.add_argument('--skip-confirm', '-y', action="store_true", dest='skip_confrm', help="skip confirmation", default=False)
parser.add_argument('--keep-old', '-k', action="store_true", dest='keep_old', help="keep old generation", default=False)
parsed = parser.parse_args()
if not parsed.folder:
SITE_ROOT = './_output/'
else:
SITE_ROOT = parsed.folder
SITE_ROOT_URL = parsed.url
# TODO: make sure they are valid
print ("Output folder:", SITE_ROOT)
print ("URL Root:", SITE_ROOT_URL)
if not parsed.skip_confrm:
yn = input("Continue? y/N ").lower()
if yn != "y":
print ("exiting...")
sys.exit(1)
CSS_URL = SITE_ROOT_URL + CSS_FILE
def saveHTML(code, filepath):
try:
soup = BeautifulSoup(code, 'html.parser')
f = open(filepath, 'w', encoding="utf8")
f.write(str(soup))
f.close()
return os.path.isfile(filepath)
except Exception as e:
print ("saveHTML error:", filepath)
print (e)
return False
def xml_clean(text): # https://stackoverflow.com/a/20819845
return str(''.join(ascii.isprint(c) and c or '' for c in text))
def generateHeader(page_title, css_class):
output = '<html>'
output += '<head>'
output += '<link rel="stylesheet" type="text/css" href="' + CSS_URL + '">'
output += '<link rel="apple-touch-icon" sizes="180x180" href="' + SITE_ROOT_URL + 'apple-touch-icon.png?v=rMlK32YJeL">'
output += '<link rel="icon" type="image/png" sizes="32x32" href="' + SITE_ROOT_URL + 'favicon-32x32.png?v=rMlK32YJeL">'
output += '<link rel="icon" type="image/png" sizes="16x16" href="' + SITE_ROOT_URL + 'favicon-16x16.png?v=rMlK32YJeL">'
#output += '<link rel="manifest" href="' + SITE_ROOT_URL + 'manifest.json?v=rMlK32YJeL">'
output += '<link rel="mask-icon" href="' + SITE_ROOT_URL + 'safari-pinned-tab.svg?v=rMlK32YJeL" color="#ff0f00">'
output += '<link rel="shortcut icon" href="' + SITE_ROOT_URL + 'favicon.ico?v=rMlK32YJeL">'
#output += '<meta name="msapplication-config" content="' + SITE_ROOT_URL + 'browserconfig.xml?v=rMlK32YJeL">'
output += '<link rel="alternate" type="application/rss+xml" title="RSS" href="' + SITE_ROOT_URL + 'rss.xml" />'
# font
output += '<link rel="preconnect" href="https://fonts.googleapis.com">'
output += '<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>'
output += '<link href="https://fonts.googleapis.com/css2?family=Montserrat&display=swap" rel="stylesheet">'
# end font
output += '<title>' + page_title.strip() + SITE_TITLE_SUFFIX + '</title>' # strip() to remove newline
output += '<meta charset="utf-8">'
output += '<meta name="viewport" content="width=device-width, initial-scale=1">'
output += '</head>'
output += '<body>'
output += '<div class="navigation">'
output += '<p>'
output += '<a href="' + SITE_ROOT_URL + '" class="logo">' + SITE_TITLE + '</a>'
output += '<br>'
output += '<a href="' + SITE_ROOT_URL + 'archive">Archive</a> β’ '
output += '<a href="' + SITE_ROOT_URL + 'stats">Stats</a> β’ '
output += '<a href="' + SITE_ROOT_URL + 'misc">Misc</a> β’ '
output += '<a href="' + SITE_ROOT_URL + 'about">About</a> '
output += '</p>'
output += '</div>'
output += '<div class="' + css_class + '">'
return output
def generateFooter():
output = '<footer>'
output += '<a href="mailto:' + AUTHOR_EMAIL + '">' + AUTHOR_EMAIL + '</a><br>'
output += 'See the <a href="https://lambdan.se/blog/about">about page</a> for feeds and more ways to contact me<br>'
output += 'Β©' + str(SITE_STARTED_YEAR) + '-' + str(datetime.now().year) + ' ' + AUTHOR_NAME + '<br>'
#output += '<br><small>Generated ' + script_run_time.strftime('%Y-%m-%d %H:%M') + ' by <a href="https://github.com/lambdan/lambblog">lambblog</a></small>'
output += '</footer></body></html>'
return output
if not parsed.keep_old:
if os.path.isdir(SITE_ROOT):
shutil.rmtree(SITE_ROOT)
os.makedirs(SITE_ROOT)
elif not os.path.isdir(SITE_ROOT):
os.makedirs(SITE_ROOT)
if not os.path.isdir(IMAGES_FOLDER):
os.makedirs(IMAGES_FOLDER)
if not os.path.isdir(THUMBS_FOLDER):
os.makedirs(THUMBS_FOLDER)
input_files = []
posts = []
processed_posts = 0
print("Searching for posts in", POSTS_DIR, "...")
for dirpath, dirnames, files in os.walk(POSTS_DIR):
for file in files:
fullpath = os.path.join(dirpath, file)
if os.path.splitext(file)[1] in VALID_POST_EXTENSIONS:
input_files.append(fullpath)
else:
print("(Skipping",dirpath,file,"because it doesnt have a supported file extension)")
print("Found", len(input_files), "posts")
print("Processing posts...")
pbar = tqdm(total=len(input_files))
for post in input_files: # post is fullpath to the text file
if parsed.verbose: print("Processing:", post)
f = open(post, 'r', encoding="utf8")
try:
date = parse(f.readline(), fuzzy=True) # 1st line, date
except Exception as e:
print ("ERROR WITH", post)
print ("Couldnt parse a date, you probably forgot to put a date at the very top of the file:", e)
print ("Exiting...")
sys.exit(1)
title = f.readline().rstrip() # 2nd line, title
title = title[2:].rstrip()
third_line = f.readline() # 3rd line, possibly a link
body_text = f.read()
f.close()
slug_title = slugify(title)
if parsed.verbose: print('Title:', title)
if parsed.verbose: print('Slug title:', slug_title)
# create folder for post
post_url = str(date.year) + "/" + "%02d" % date.month + "/" + "%02d" % date.day + "/" + str(slug_title)
post_root = os.path.join(SITE_ROOT, post_url)
if not os.path.isdir(post_root): os.makedirs(post_root)
if parsed.verbose: print('Post root folder:', post_root)
# copy original text file to post root so it can be viewed by adding .text to post URL
shutil.copy(post, os.path.join(post_root, '.text'))
# header
html_output = generateHeader(title, "article")
# blog post header
if third_line.lower().startswith("http"): # detect linked post
isLinked = third_line.rstrip()
html_output += '<h1 class="article_title_linked"><a href="' + third_line + '">' + title + '</a></h1>'
else:
isLinked = False
html_output += '<h1 class="article_title">' + title + '</h1>'
if parsed.verbose: print('Linked post:', isLinked)
html_output += '<h2 class="article_date">' + date.strftime('%a %d %b %Y, %H:%M') + '</h2>'
# body text
markdown_body = markdown2.markdown(body_text, extras=["strike", "tables"])
# find images and mirror/thumbnail them
soup = BeautifulSoup(markdown_body, "html.parser")
images = soup.find_all('img') # https://stackoverflow.com/a/47166709
image_urls = []
if len(images) > 0:
for image in images:
imgurl = image['src']
ext = os.path.splitext(imgurl)[1].lower()
# remove twitters weird :orig :large extensions
if 'jpeg:' in ext or 'jpg:' in ext:
ext = ".jpg"
if parsed.verbose: print ('correcting extension of', imgurl, 'to', ext)
elif 'png:' in ext:
ext = ".png"
if parsed.verbose: print ('correcting extension of', imgurl, 'to', ext)
# cursed old puush images
if ext == '':
if parsed.verbose: print (imgurl, "has no extension. assuming jpg")
ext = ".jpg"
# add a dot in case the extension doesnt come with one
if '.' not in ext:
ext = "." + ext
md5 = hashlib.md5(imgurl.encode("utf8")).hexdigest().lower()
mirror_img_filename = md5 + ext
mirror_img_filepath = os.path.join(IMAGES_FOLDER, mirror_img_filename)
if os.path.isfile(mirror_img_filepath):
if parsed.verbose: print ("Already mirrored:", imgurl, "(" + str(mirror_img_filepath) + ")")
destination = os.path.join(post_root, mirror_img_filename)
shutil.copy(mirror_img_filepath, destination)
else:
if parsed.verbose: print ("Picture not mirrored:", imgurl)
if parsed.verbose: print ("Attempting to mirror", imgurl, " --> ", mirror_img_filepath)
f = open(mirror_img_filepath, 'wb')
f.write(requests.get(imgurl).content)
f.close()
#f = open(mirror_img_filepath, 'r')
#if "puush could not be found" in f.read().lower(): # this is broken with python3
# print ("error: image download failed: puush could not be found")
# f.close()
# os.remove(mirror_img_filepath)
if not os.path.isfile(mirror_img_filepath):
print ("ERROR WITH MIRRORING: Downloading image seems to have failed")
print ("Exiting...")
sys.exit(1)
else:
if parsed.verbose: print ("Success downloading image", imgurl, "-->", mirror_img_filepath)
destination = os.path.join(post_root, mirror_img_filename)
shutil.copy(mirror_img_filepath, destination)
mirror_img_size = os.path.getsize(mirror_img_filepath)
mirror_img_filename_thumb = md5 + ".thumb.jpg"
mirror_img_filepath_thumb = os.path.join(THUMBS_FOLDER, mirror_img_filename_thumb)
if ext == ".gif":
if parsed.verbose: print ('Skipping thumbnail for gif:', imgurl)
if os.path.isfile(mirror_img_filepath_thumb):
if parsed.verbose: print ('Deleting old GIF thumbnail because its not needed anymore:', mirror_img_filepath_thumb)
os.remove(mirror_img_filepath_thumb)
image['src'] = mirror_img_filename
elif mirror_img_size < IMAGE_SIZE_THRESHOLD:
if parsed.verbose: ('Skipping thumbnail because original is small enough:', imgurl, " - size:", mirror_img_size)
if os.path.isfile(mirror_img_filepath_thumb):
if parsed.verbose: print ('Deleting old thumbnail because its not needed anymore (original is already small enough):', mirror_img_filepath_thumb)
os.remove(mirror_img_filepath_thumb)
image['src'] = mirror_img_filename
else:
if os.path.isfile(mirror_img_filepath): # Check if mirror image exists, we generate thumbs from that so we need it
thumb_destination = os.path.join(post_root, mirror_img_filename_thumb) # Destination to where thumb is served for the website
if not os.path.isfile(mirror_img_filepath_thumb) or parsed.regenThumbs: # Thumbnail does not exist, we need to re-generate it OR force regen is on
if parsed.verbose: print ("Generating thumbnail for", imgurl, "-->", mirror_img_filepath_thumb)
im = Image.open(mirror_img_filepath) # Source image is the mirrored image
if not im.mode == 'RGB':
im = im.convert('RGB')
im.thumbnail((THUMBNAIL_MAX_RESOLUTION))
im.save(mirror_img_filepath_thumb, "JPEG")
if not os.path.isfile(mirror_img_filepath_thumb):
print ("ERROR WITH THUMBNAIL: creating thumbnail seems to have failed")
print ("Exiting...")
sys.exit(1)
else:
if parsed.verbose: print("Skipping thumbnail generation for", imgurl)
if parsed.verbose: print("Copying thumbnail", mirror_img_filepath_thumb, "-->", thumb_destination)
if not os.path.isfile(thumb_destination):
shutil.copy(mirror_img_filepath_thumb, thumb_destination) # Copy thumb to website
else:
if parsed.verbose: print("Nevermind... thumb already in destination")
image['src'] = mirror_img_filename_thumb
else:
print ("ERROR WITH THUMBNAIL: original file hasnt been downloaded so i cant create a thumbnail")
sys.exit(1)
image['src'] = SITE_ROOT_URL + post_url + '/' + image['src'] # disgusting hack for getting full urls in RSS
image_urls.append(image['src']) # this will be used for front page
link_to_fullres = soup.new_tag('a', href=SITE_ROOT_URL + post_url + '/' + mirror_img_filename) # make the thumb clickable to get fullres
image.wrap(link_to_fullres)
html_output += str(soup)
html_output += '</div>'
# footer
# previous/next navigation should probably be here?
html_output += generateFooter()
stub = markdown2.markdown(body_text, extras=["tables"]).split("\n")[0]
if saveHTML(html_output, os.path.join(post_root, 'index.html')):
#print ("success: wrote", title.strip(), "to", post_root)
posts.append({'title': title.strip(), 'linked': isLinked, 'slug': slugify(title.strip()), 'full_url': SITE_ROOT_URL + post_url, 'textfile': post, 'words': len(body_text.split()), 'chars': len(body_text), 'date': date, 'path': post_url, 'stub': stub, 'body': str(soup), 'images': image_urls})
else:
print ("ERROR: writing post html seems to have failed")
sys.exit(1)
# write stats page
html_output = generateHeader(title + ' - Stats', "normal")
html_output += '<h1>Stats for <u><a href="' + SITE_ROOT_URL + post_url + '">' + title + '</a></u></h1>'
html_output += '<p>'
html_output += str(len(body_text.split())) + ' words π<br>'
html_output += str(len(body_text)) + ' characters ποΈ<br>'
html_output += str(len(image_urls)) + ' images πΌοΈ<br>'
html_output += '</p>' # how many images
# filter out so we only get whitelisted characters (STATS_WHITELISTED_CHARACTERS) to avoid counting symbols and dashes etc.
stats_body_text = ''.join(filter(STATS_WHITELISTED_CHARACTERS.__contains__,body_text)) # https://stackoverflow.com/a/21564666
count = Counter(stats_body_text.lower().split()) # also make it lowercase so for example "The" and "the" aren't separated
html_output += '<h2>Words Used More Than Once:</h2>'
html_output += '<p>' # 10
for word, value in count.most_common(len(count)): # i tried listing all words but it stops working properly for some reason, around 2080 words
if value > 1:
html_output += '"' + str(word) + '" is used <i>' + str(value) + ' times</i><br>'
html_output += '</p>'
html_output += '</div>'
html_output += generateFooter()
if not saveHTML(html_output, os.path.join(post_root, 'stats.html')):
print ("ERROR: saving stats page failed", post_root)
sys.exit(1)
processed_posts += 1
posts_left = len(input_files) - processed_posts
if parsed.verbose: print("OK,", posts_left, "left...\n")
pbar.update(1)
pbar.close()
####################################
if len(input_files) == processed_posts:
print ("Success! Made", processed_posts, "posts from", len(input_files), "input files")
else:
print ("ERROR: not every post seems to have been processed")
print ("Expected posts:", len(input_files))
print ("Processed posts:", processed_posts)
print ("Exiting...")
sys.exit(1)
newlist = sorted(posts, key=lambda k: k['date'], reverse=True) # sort by dates, reverse to get newest on top
posts = newlist # extremely ugly code but whatever
print ("Generating main RSS feed...")
# First generate RSS feed where linked posts links back to our site
rss_items = [] # First generate the posts RSS items...
for p in posts[:20]: # only use 20 newest posts
item = PyRSS2Gen.RSSItem(
title = xml_clean(p['title']),
link = p['full_url'],
description = xml_clean(p['body']),
pubDate = p['date']
)
rss_items.append(item)
rss = PyRSS2Gen.RSS2( # ...then create the main RSS feed and include those items
title = SITE_TITLE,
link = SITE_ROOT_URL,
description = SITE_TITLE + ' RSS feed',
lastBuildDate = script_run_time,
items = rss_items)
rss.write_xml(open(os.path.join(SITE_ROOT,'rss.xml'), 'w'))
print ("Generating alternate RSS feed...")
# Generate RSS feed where linked posts links to their links
rss_items = [] # First generate the posts RSS items...
for p in posts[:20]: # only use 20 newest posts
if p['linked']:
linkURL = p['linked']
else:
linkURL = p['full_url']
item = PyRSS2Gen.RSSItem(
title = xml_clean(p['title']),
link = linkURL,
description = xml_clean(p['body']),
pubDate = p['date']
)
rss_items.append(item)
rss = PyRSS2Gen.RSS2( # ...then create the main RSS feed and include those items
title = SITE_TITLE + ' (Alternate)',
link = SITE_ROOT_URL,
description = SITE_TITLE + ' Alternate RSS feed (linked posts link to their targets)',
lastBuildDate = script_run_time,
items = rss_items)
rss.write_xml(open(os.path.join(SITE_ROOT,'rss_follow_links.xml'), 'w'))
print ("Generating RSS feed without linked posts...")
rss_items = [] # First generate the posts RSS items...
for p in posts[:20]: # only use 20 newest posts
if p['linked']:
continue
item = PyRSS2Gen.RSSItem(
title = xml_clean(p['title']),
link = p['full_url'],
description = xml_clean(p['body']),
pubDate = p['date']
)
rss_items.append(item)
rss = PyRSS2Gen.RSS2( # ...then create the main RSS feed and include those items
title = SITE_TITLE + ' (Originals)',
link = SITE_ROOT_URL,
description = SITE_TITLE + ' Originals RSS feed (linked posts not included)',
lastBuildDate = script_run_time,
items = rss_items)
rss.write_xml(open(os.path.join(SITE_ROOT,'rss_originals.xml'), 'w'))
print ("Writing front page...")
html_output = generateHeader("Blog", "normal")
for p in posts[:10]:
if p['linked']: # linked post? is either False or an URL
html_output += '<h1 class="article_title_linked"><a href="' + p['linked'] + '">' + p['title'] + '</a></h1>'
else:
html_output += '<h1 class="article_title"><a href="' + p['full_url'] + '">' + p['title'] + '</a></h1>'
html_output += '<h2 class="article_date"><a href="' + p['full_url'] + '">' + p['date'].strftime('%a %d %b %Y, %H:%M') + '</a></h2>'
html_output += p['stub']
if len(p['images']) > 0: # add first image from post, clicking it links to article
html_output += '<a href="' + p['full_url'] + '">'
html_output += '<img src="' + p['images'][0] + '">' # 0 to get first image (the top one)
html_output += '</a>'
html_output += '<br><br>'
html_output += '</div>'
html_output += '<div class="article"><center><p>Go to the <a href="archive">archive</a> to see all posts</p></center></div>' # add article div to get colored link
html_output += generateFooter()
if not saveHTML(html_output, os.path.join(SITE_ROOT, 'index.html')):
print ("error saving front page")
print ("Writing archive...")
html_output = generateHeader("Blog Archive", "normal")
#html_output += '<p>Hint: use your web browsers\' search function to find what you\'re looking for.</p>'
yr = 0
mo = 0
for p in posts:
if yr != p['date'].year:
html_output += '<h1 class="article_title"><u><a href="' + str(p['date'].year) +'">' + str(p['date'].year) + '</a></u></h1>'
yr = p['date'].year
if mo != p['date'].month:
html_output += '<h2><a href="' + str(p['date'].year) + '/' + "%02d" % p['date'].month +'">' + p['date'].strftime('%B') + '</a></h2>'
mo = p['date'].month
html_output += '<li><a href="' + p['path'] + '">' + p['title'] + '</a></li>'
html_output += "</div>"
html_output += generateFooter()
saveHTML(html_output, os.path.join(SITE_ROOT, 'archive.html'))
print ("Making indexes for every year...")
years = []
mo = 0
for p in posts:
y = p['date'].year
years.append(y)
for yr in years:
html_output = generateHeader(str(yr), "normal")
html_output += '<h1 class="article_title">' + str(yr) + '</h1>'
for p in posts:
y = p['date'].year
if y == yr:
if mo != p['date'].month:
html_output += '<h2><a href="' + "%02d" % p['date'].month +'">' + p['date'].strftime('%B') + '</a></h2>'
mo = p['date'].month
url = "%02d" % p['date'].month + '/' + "%02d" % p['date'].day + '/' + p['slug']
html_output += '<li><a href="' + url + '">'
html_output += p['title']
html_output += '</a></li>'
html_output += '</div>'
html_output += generateFooter()
saveHTML(html_output, os.path.join(SITE_ROOT, str(yr), 'index.html'))
print ("Making indexes for every month...")
months = []
for p in posts:
y = p['date'].year
m = p['date'].month
months.append(str(y) + '-' + "%02d" % m) # ex 2019-04
for month in months:
print_month = datetime.strptime(month, '%Y-%m').strftime('%B %Y') # ex April 2019
html_output = generateHeader(print_month, 'normal')
html_output += '<h1>' + print_month + '</h1>'
html_output += '<ul>'
for p in posts:
y = str(p['date'].year)
m = str("%02d" % p['date'].month)
if y == month.split('-')[0] and m == month.split('-')[1]:
#print y, m, p['title']
url = "%02d" % p['date'].day + '/' + p['slug']
html_output += '<li><a href="' + url + '">'
html_output += p['title']
html_output += '</a></li>'
html_output += '</ul></div>'
html_output += generateFooter()
saveHTML(html_output, os.path.join(SITE_ROOT, month.split('-')[0], month.split('-')[1], 'index.html'))
print ("Making indexes for every day...") # for those url modifying geeks
days = []
for p in posts:
y = p['date'].year
m = p['date'].month
d = p['date'].day
days.append(str(y) + '-' + "%02d" % m + '-' + "%02d" % d) # ex 2019-04-20
for day in days:
print_day = datetime.strptime(day, '%Y-%m-%d').strftime('%d %B %Y') # 20 April 2019
html_output = generateHeader(print_day, 'normal')
html_output += '<h1>' + print_day + '</h1>'
html_output += '<ul>'
for p in posts:
y = str(p['date'].year)
m = str("%02d" % p['date'].month)
d = str("%02d" % p['date'].day)
if y == day.split('-')[0] and m == day.split('-')[1] and d == day.split('-')[2]:
url = p['slug']
html_output += '<li><a href="' + url + '">'
html_output += p['title']
html_output += '</a></li>'
html_output += '</ul></div>'
html_output += generateFooter()
saveHTML(html_output, os.path.join(SITE_ROOT, day.split('-')[0], day.split('-')[1], day.split('-')[2], 'index.html'))
print ("Creating other pages:")
pages = os.listdir(OTHER_PAGES_FOLDER)
for page in pages:
if not page.lower().endswith('html'):
if parsed.verbose: print ("(ignoring",page,"because its not a html file)")
continue
path = os.path.join(OTHER_PAGES_FOLDER, page)
f = open(path,'r')
title = f.readline() # title is the first line
html_output = generateHeader(title, "article")
html_output += f.read() # ... then read the rest of the file
f.close()
html_output += '</div>'
html_output += generateFooter()
if saveHTML(html_output, os.path.join(SITE_ROOT, page)):
print ("\t", page)
else:
print ("\t", page, "= failed")
sys.exit(1)
print ("Creating stats page...")
total_words = 0
total_chars = 0
total_images = 0
for p in posts:
total_words += p['words']
total_chars += p['chars']
total_images += len(p['images'])
total_posts = len(posts)
# hide some useful debug info in the html as a comment
html_output = '<!---'
html_output += 'lambblog super hidden secret debug info: '
html_output += 'Generated ' + script_run_time.strftime('%Y-%m-%d %H:%M') + ", python sys.version: "
html_output += sys.version
html_output += '--->'
# and then make the actual stats page
html_output += generateHeader('Stats', 'normal')
html_output += '<h1>Stats π―</h1>'
html_output += '<ul>'
html_output += '<li>π ' + str(total_posts) + ' posts</li>'
html_output += '<li>π ' + str(total_words) + ' words</li>'
html_output += '<li>ποΈ ' + str(total_chars) + ' characters</li>'
html_output += '<li>πΌοΈ ' + str(total_images) + ' images</li>'
html_output += '</ul>'
html_output += '<h2>Posts With Most Words π</h2>'
html_output += '<ol>'
posts_sorted_by_words = sorted(posts, key=lambda k: k['words'], reverse=True) # sort by dates, reverse to get most on top
for p in posts_sorted_by_words:
html_output += '<li><a href="' + p['path'] + '">' + p['title'] + '</a> β <a href="' + p['path'] + '/stats' + '">' + str(p['words']) + ' words</a></li>'
html_output += '</ol>'
html_output += '<h2>Posts With Most Images πΌοΈ</h2>'
html_output += '<ol>'
posts_sorted_by_images = sorted(posts, key=lambda k: len(k['images']), reverse=True)
for p in posts_sorted_by_images:
if len(p['images']) > 0:
html_output += '<li><a href="' + p['path'] + '">' + p['title'] + '</a> β <a href="' + p['path'] + '/stats' + '">' + str(len(p['images']))
if len(p['images']) == 1:
html_output += ' image'
else:
html_output += ' images'
html_output += '</a></li>'
html_output += '</ol>'
html_output += '</div>'
html_output += generateFooter()
if not saveHTML(html_output, os.path.join(SITE_ROOT, 'stats.html')):
print ("ERROR: saving stats page failed")
sys.exit(1)
print ("Copying files from the include folder to the site root...")
files = os.listdir(INCLUDE_FOLDER)
for f in files:
src = os.path.join(INCLUDE_FOLDER, f)
dest = os.path.join(SITE_ROOT, f)
if parsed.verbose: print("\t", src, "-->", dest)
shutil.copy(src, dest)
print()
print ("All done!")
print ("Find your website in", os.path.abspath(SITE_ROOT))