In [14]:
import os
import re

def reformat_md_files(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                # Skip files that are already reformatted
                if content.strip().startswith("---"):
                    continue
                
                # Extract tags (e.g., #tag)
                tags = re.findall(r"#(\w+)", content)
                
                # Extract hyperlinks and clean them
                links = re.findall(r"https?://[^\s\)]+", content)
                links = list(set(link for link in links if not any(link.startswith(f"#{tag}") for tag in tags)))  # Exclude tag links
                
                # Prepare the header
                title = os.path.splitext(file)[0]
                header = f"---\ntitle: {title}\ntags:\n"
                header += "".join([f"  - {tag}\n" for tag in set(tags)])
                header += "links:\n"
                header += "".join([f"  - {link}\n" for link in links])
                header += "---\n\n"
                
                # Remove all hyperlinks and " ():" combinations from the content
                content_without_links = re.sub(r"https?://[^\s\)]+", "", content)
                content_cleaned = re.sub(r"\s\(\):", "", content_without_links)
                
                # Rewrite the file
                new_content = header + content_cleaned
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(new_content)

# Replace 'your_directory_path' with the path to the folder containing the .md files
reformat_md_files('2024/January')

In [3]:
import os
from datetime import datetime
import calendar

# Function to update timestamps of files in a folder
def update_file_timestamps(root_folder):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            try:
                # Parse the folder name as a month
                month_number = datetime.strptime(folder_name, "%B").month
            except ValueError:
                print(f"Skipping invalid folder name: {folder_name}")
                continue

            year = int(os.path.basename(root_folder))  # Assuming the root folder is named by year
            last_day = calendar.monthrange(year, month_number)[1]
            last_day_date = datetime(year, month_number, last_day, 23, 59, 59)

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path):
                    # Update timestamps
                    timestamp = last_day_date.timestamp()
                    os.utime(file_path, (timestamp, timestamp))
                    print(f"Updated timestamps for: {file_path} to {last_day_date}")

# Path to the root folder
year_folder = "2024"

update_file_timestamps(year_folder)

Updated timestamps for: 2024/April/EDPB opinion on pay or consent.md to 2024-04-30 23:59:59
Updated timestamps for: 2024/April/The CJEU AG considers Amazon OTC-medicines sales by pharmacies as not health data (unless the controller uses the data for diagnosis purposes).md to 2024-04-30 23:59:59
Updated timestamps for: 2024/April/FTC fines Cerebral $7.1M to for improper use of sensitive data for advertising.md to 2024-04-30 23:59:59
Updated timestamps for: 2024/April/CJEU opines on compensation of damages to data subjects.md to 2024-04-30 23:59:59
Updated timestamps for: 2024/April/FTC prohibits an alcohol addiction treatment service from selling data.md to 2024-04-30 23:59:59
Updated timestamps for: 2024/April/The CJEU AG considers that sensitive data manifestly made public does not automatically allow for its use for targeted marketing.md to 2024-04-30 23:59:59
Updated timestamps for: 2024/October/Meta implements data combination controls following anti-trust proceedings in Germany.md

In [None]:
import os

# Function to clean .md files in a folder
def clean_md_files(root_folder):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path) and file_name.endswith(".md"):
                    # Remove trailing commas in links
                    with open(file_path, "r") as file:
                        lines = file.readlines()

                    with open(file_path, "w") as file:
                        in_links_section = False
                        for line in lines:
                            if line.strip().startswith("links:"):
                                in_links_section = True
                                file.write(line)
                                continue

                            if in_links_section:
                                if line.strip() == "---":
                                    in_links_section = False
                                else:
                                    line = line.rstrip(',') + '\n'

                            file.write(line)

# Path to the root folder
year_folder = os.path.join(os.getcwd(), "2024")

clean_md_files(year_folder)

In [2]:
import os

# Function to clean .md files in a folder
def clean_md_files(root_folder, safe_mode=False):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path) and file_name.endswith(".md"):
                    # Read the file content
                    with open(file_path, "r") as file:
                        content = file.read()

                    # Find the links section
                    start_idx = content.find("links:")
                    end_idx = content.find("---", start_idx)

                    if start_idx != -1 and end_idx != -1:
                        links_section = content[start_idx:end_idx].splitlines()
                        cleaned_links = ["links:"]

                        for line in links_section:
                            if line.strip().startswith("-"):
                                # Remove trailing comma
                                cleaned_links.append(line.rstrip(")."))

                        # Rebuild the content
                        new_links_section = "\n".join(cleaned_links) + "\n"
                        new_content = content[:start_idx] + new_links_section + content[end_idx:]

                        if safe_mode:
                            print(f"File: {file_path}")
                            print("--- Original Links Section ---")
                            print("\n".join(links_section))
                            print("--- Cleaned Links Section ---")
                            print(new_links_section)
                        else:
                            # Write the updated content back to the file
                            with open(file_path, "w") as file:
                                file.write(new_content)

# Path to the root folder
year_folder = os.path.join(os.getcwd(), "2024")

# Set safe_mode to True to preview changes without overwriting files
clean_md_files(year_folder, safe_mode=False)

In [8]:
import os
from datetime import datetime
import calendar

# Function to update timestamps of files in a folder
def update_file_timestamps(root_folder):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            try:
                # Parse the folder name as a month
                month_number = datetime.strptime(folder_name, "%B").month
            except ValueError:
                print(f"Skipping invalid folder name: {folder_name}")
                continue

            year = int(os.path.basename(root_folder))  # Assuming the root folder is named by year
            last_day = calendar.monthrange(year, month_number)[1]
            last_day_date = datetime(year, month_number, last_day, 23, 59, 59)
            print(f'{year}-{month_number}-{last_day}')

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path):
                    # Update timestamps
                    timestamp = last_day_date.timestamp()
                    #os.utime(file_path, (timestamp, timestamp))
                    #print(f"Updated timestamps for: {file_path} to {last_day_date}")

# Path to the root folder
year_folder = "2024"

update_file_timestamps(year_folder)

2024-4-30
2024-10-31
2024-12-31
2024-2-29
2024-6-30
2024-7-31
2024-8-31
2024-5-31
2024-11-30
2024-1-31
2024-3-31
2024-9-30


In [10]:
import os
from datetime import datetime
import calendar

# Function to add a date attribute to .md files in a folder
def add_date_attribute(root_folder, safe_mode=False):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            try:
                # Parse the folder name as a month
                month_number = datetime.strptime(folder_name, "%B").month
            except ValueError:
                print(f"Skipping invalid folder name: {folder_name}")
                continue

            year = int(os.path.basename(root_folder))  # Assuming the root folder is named by year
            last_day = calendar.monthrange(year, month_number)[1]
            date_value = datetime(year, month_number, last_day).strftime("%Y-%m-%d")

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path) and file_name.endswith(".md"):
                    # Read the file content
                    with open(file_path, "r") as file:
                        content = file.read()

                    # Check if date attribute already exists
                    if "date:" in content:
                        print(f"File {file_path} already has a date attribute. Skipping.")
                        continue

                    # Find the end of the attribute section
                    attribute_end_idx = content.find("---", 3)  # Skip the first "---"

                    if attribute_end_idx != -1:
                        new_content = content[:attribute_end_idx] + f"date: {date_value}\n" + content[attribute_end_idx:]

                        if safe_mode:
                            print(f"File: {file_path}")
                            print("--- Original Content ---")
                            print(content)
                            print("--- New Content ---")
                            print(new_content)
                        else:
                            # Write the updated content back to the file
                            with open(file_path, "w") as file:
                                file.write(new_content)

# Path to the root folder
year_folder = os.path.join(os.getcwd(), "2024")

# Set safe_mode to True to preview changes without overwriting files
add_date_attribute(year_folder, safe_mode=False)
