# Import & variables

In [64]:
import json
import os
import requests
import re
import time

In [2]:
server_url = "enter your Outline webserver url : https://www.example.com"
api_key = 'enter your Outline API key'

In [69]:
path = "enter your path to attachments folder"
path_mk_docs = "enter your path to markdown docs folder"

In [4]:
# manage your extensions, and corresponding MIME type, as used by Outline upload API
MIME_type_dict = { 'gig':'image/gif', 'png': 'image/png', 'webp' : 'image/webp', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'pdf':'application/pdf', 'sh': 'application/x-sh', 'jpeg' : 'image/jpeg', 'jpg' : 'image/jpeg'}

# Browse attachment files (does not support subfolders) and build dict for upload to Outline

In [72]:
list_files = []

In [73]:
os.chdir(path)
for root, dirs, files in os.walk(".", topdown=False):
    for name in files:
        with open(name, 'rb') as f:
            try:
                filestats = os.stat(name)
                list_files.append({ 'ori_filename': name, 'ori_size': filestats.st_size, 'MIME_ext': MIME_type_dict[name.split('.')[-1]],'file_data': f.read(),'uploadUrl' : '', 'form':'', 'attachment':'', 'accessUrl':'' })
            except KeyError:
                # add here exception managements (such as files without extension, or unknown extensions )
                if name == 'xxx':
                    list_files.append({ 'ori_filename': name, 'ori_size': filestats.st_size, 'MIME_ext': MIME_type_dict['png'],'file_data': f.read(),'uploadUrl' : '', 'form':'', 'attachment':'', 'accessUrl':'' })


In [None]:
#check file list length against your folder
len(list_files)

# Get Outline attachment urls, post attachments

In [None]:
# obtain attachment creation urls, and then push attachments
url = server_url + '/api/attachments.create'

for items in list_files:
    if items['accessUrl']!= '':
        pass
    else:
        print(items['ori_filename'])
        payload = {
            "name": items['ori_filename'],
            "contentType": items['MIME_ext'],
            "size": items['ori_size'],
            "preset": "documentAttachment"
        }
        headers = {
            "Content-Type": "application/json",
            "Authorization": "Bearer " + api_key
        }
    
        response = requests.post(url, json=payload, headers=headers)
        response_data = response.json()
        print(response_data)
        if response_data['ok'] == False and response_data['status'] == 429:
            print("wait for one minute, rate limit obtained")
            time.sleep(60)
            response = requests.post(url, json=payload, headers=headers)
            response_data = response.json()
            print(response_data)
        items['uploadUrl'] = response_data['data']['uploadUrl']
        items['form'] = response_data['data']['form']
        items['attachment'] = response_data['data']['attachment']
        items['accessUrl'] = response_data['data']['attachment']['url']

        url_upload = server_url + items['uploadUrl']
        headers_upload = {"Authorization": "Bearer " + api_key}
        files = { 'file' : (items['ori_filename'], items['file_data'], items['MIME_ext'] ) }
        response = requests.post(url=url_upload, data = items['form'], files = files, headers=headers_upload)
        print(response.json())

In [None]:
# Check file names and access URLS : the other columns are not needed anymore
for items in list_files:
    print(items['ori_filename'], items['accessUrl'])

# Update MD docs (from confluence to Outline)

In [42]:
# Regex and replacement functions to adapt MD docs from Confluence to Outline : deal with different way to handle 
# INFO, WARNING, NOTES, Bluestar logo, Google urls

# Regex pattern for blue star blocks
pattern_bluestar = r"## !\[\(blue star\)\]\(.*?\)\n\n(.+?)\n\n"

# Replacement function
def replace_section_bluestar(match):
    section_title = match.group(1)  # Capture the "Related articles" (or similar text)
    return f"## {section_title}\n\n"


# Regex pattern to match the block
pattern_info = r"> \[!INFO\]\n((?:> .*\n)+)"

# Replacement function
def replace_info_block(match):
    info_content = re.sub(r"^> ", "", match.group(1), flags=re.MULTILINE)  # Capture the informational content
    return f":::info\n{info_content}\n\n:::\n\n"

# Regex pattern to match the block
pattern_warning = r"> \[!WARNING\]\n((?:> .*\n)+)"

# Replacement function
def replace_warning_block(match):
    info_content = re.sub(r"^> ", "", match.group(1), flags=re.MULTILINE) # Capture the informational content
    return f":::warning\n{info_content}\n\n:::\n\n"

# Regex pattern to match the block
pattern_note = r"> \[!NOTE\]\n((?:> .*\n)+)"

# Replacement function
def replace_note_block(match):
    info_content = re.sub(r"^> ", "", match.group(1), flags=re.MULTILINE) # Capture the informational content
    return f":::tip\n{info_content}\n\n:::\n\n"


# Regex pattern for attachments
pattern_attachment = r"\(\./attachments/.*?\)"

def find_url_by_name(filename, list_files):
    for item in list_files:
        if item['ori_filename'] == filename:
            return item['accessUrl']
    return filename
    
# Replacement function
def replace_attachment_link(match):
    original_link = match.group(0)  # Full match, e.g., (./attachments/filename)
    filename = original_link[15:-1].replace("%20", " ")  # Extract filename (after `./attachments/`, before `)`)
    new_base_url = find_url_by_name(filename, list_files)
    return f"({new_base_url})"



# manage google links
def add_line_break_before_google_links(markdown_text):
    # Define a regex to find Google Docs or Drive links not at the start of a line
    pattern = r"(?<!\n)(\s*)(\[(https://(?:docs|drive)\.google\.com[^\]]*)\])"
    
    # Add a line break before the matched link
    updated_text = re.sub(pattern, r"\n\n\2", markdown_text)
    
    return updated_text

In [None]:
# walk through folder (including sub folders), create modified version of MD files in the same folder
os.chdir(path_mk_docs)
for root, dirs, files in os.walk(".", topdown=False):
    for name in files:
        # check file is an md file
        if os.path.splitext(name)[1] != ".md":
            print(name + "\n error, this is not an md file")
            pass
        else:
            with open(os.path.join(root, name), 'r') as f:
                doc = f.read()
                # update bluestar
                doc = re.sub(pattern_bluestar, replace_section_bluestar, doc)
                # update info
                doc = re.sub(pattern_info, replace_info_block, doc)
                # update warning
                doc = re.sub(pattern_warning, replace_warning_block, doc)
                # update note
                doc = re.sub(pattern_note, replace_note_block, doc)
                # update attachments
                doc = re.sub(pattern_attachment, replace_attachment_link, doc)
                # manage google links
                doc = add_line_break_before_google_links(doc)
            new_filename = os.path.splitext(name)[0] + '_modified' + os.path.splitext(name)[1]
            with open(os.path.join(root, new_filename), 'w') as f:
                f.write(doc)