In [None]:
import os
from bs4 import BeautifulSoup
from datetime import datetime

# Load the HTML file
with open('rpgmakernet_2.html', 'r', encoding='utf-8') as f:
    soup = BeautifulSoup(f, 'html.parser')

# Directory to save markdown files
output_dir = '/mnt/data/docs/blog/posts/Archive/RPGMaker.net'
os.makedirs(output_dir, exist_ok=True)

# Process each blog post
for post in soup.find_all('div', class_='blog_post'):
    # Extract title
    title_tag = post.find('h3').find('a')
    title = title_tag.get_text(strip=True)
    
    # Extract date
    date_str = post.find('li', class_='small').get_text(strip=True)
    # Parse date like "05/09/2021 12:35 AM"
    dt = datetime.strptime(date_str, '%m/%d/%Y %I:%M %p')
    date_iso = dt.strftime('%Y-%m-%d')
    yy_date = dt.strftime('%y-%m-%d')
    
    # Extract message content, converting <br/> to newlines
    message_div = post.find('div', class_='message')
    for br in message_div.find_all('br'):
        br.replace_with('\n')
    content = message_div.get_text().strip()
    
    # Prepare markdown content
    md = f"""---
comments: true
date: {date_iso}
categories:
  - AUTONOMY
---

# {title}
{content}
"""
    # File name with two-digit year
    filename = f"RPGM_{yy_date}.md"
    filepath = os.path.join(output_dir, filename)
    
    # Write to file
    with open(filepath, 'w', encoding='utf-8') as md_file:
        md_file.write(md)

    print(f'Generated {filepath}')