In [14]:
import os
import re

def reformat_md_files(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                # Skip files that are already reformatted
                if content.strip().startswith("---"):
                    continue
                
                # Extract tags (e.g., #tag)
                tags = re.findall(r"#(\w+)", content)
                
                # Extract hyperlinks and clean them
                links = re.findall(r"https?://[^\s\)]+", content)
                links = list(set(link for link in links if not any(link.startswith(f"#{tag}") for tag in tags)))  # Exclude tag links
                
                # Prepare the header
                title = os.path.splitext(file)[0]
                header = f"---\ntitle: {title}\ntags:\n"
                header += "".join([f"  - {tag}\n" for tag in set(tags)])
                header += "links:\n"
                header += "".join([f"  - {link}\n" for link in links])
                header += "---\n\n"
                
                # Remove all hyperlinks and " ():" combinations from the content
                content_without_links = re.sub(r"https?://[^\s\)]+", "", content)
                content_cleaned = re.sub(r"\s\(\):", "", content_without_links)
                
                # Rewrite the file
                new_content = header + content_cleaned
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(new_content)

# Replace 'your_directory_path' with the path to the folder containing the .md files
reformat_md_files('2024/January')