In [39]:
from google.colab import auth
from googleapiclient.discovery import build

In [46]:
class GoogleDocsAPIHandler:
    """
    A handler class to interact with Google Docs and Google Drive APIs.
    Provides functionalities to create, retrieve, update, and delete Google Docs.
    """
    def __init__(self):
        """
        Initializes the GoogleDocsAPIHandler and authenticates the user.
        """
        self.google_docs_client = None
        self.google_drive_client = None
        self.__authenticate()

    def __authenticate(self):
        """
        Authenticates the user and initializes Google Docs and Google Drive clients.
        """
        try:
            auth.authenticate_user()
            self.google_docs_client = build('docs', 'v1')
            self.google_drive_client = build('drive', 'v3')
        except Exception as e:
            print(f"Authentication error, please try again, error message: {e}")
            raise e

    def create_document(self, title="Default Title"):
        """
        Creates a new Google Docs document with the specified title.

        :param title: Title of the document.
        :return: Document ID of the created document.
        """
        body = {"title": title}
        document = self.google_docs_client.documents().create(body=body).execute()
        document_id = document.get("documentId")
        print(f"Document {document_id} has been created")
        return document_id

    def get_document(self, document_id):
        """
        Retrieves a Google Docs document by its ID.

        :param document_id: The ID of the document.
        :return: The document object.
        """
        return self.google_docs_client.documents().get(documentId=document_id).execute()

    def get_or_create_footer(self, document_id):
        """
        Retrieves the footer ID if it exists, otherwise creates a new footer.

        :param document_id: The ID of the document.
        :return: The footer ID.
        """
        doc = self.get_document(document_id)
        footer_id = None
        resp = doc.get("footers", {})
        if len(resp.keys()) > 0:
            footer_id = list(resp.keys())[0]
        else:
            requests = [
            {
                "createFooter": {
                    "sectionBreakLocation": {"index": 0},
                    "type": "DEFAULT"
                }
            }]
            resp = self.update_document(document_id, requests)
            footer_id = list(resp["replies"])[0]["createFooter"]["footerId"]
        return footer_id

    def update_document(self, document_id, requests):
        """
        Updates a Google Docs document with specified requests.

        :param document_id: The ID of the document.
        :param requests: A list of update requests.
        :return: The response object from the API.
        """
        response = self.google_docs_client.documents().batchUpdate(
            documentId=document_id,
            body={'requests': requests}
        ).execute()
        print(f"Document {document_id} has been updated")
        return response

    def get_document_url(self, document_id):
        """
        Generates the URL for editing a Google Docs document.

        :param document_id: The ID of the document.
        :return: The document's URL.
        """
        return f"https://docs.google.com/document/d/{document_id}/edit"

    def delete_document(self, document_id):
        """
        Deletes a Google Docs document by its ID.

        :param document_id: The ID of the document to delete.
        """
        try:
            self.google_drive_client.files().delete(fileId=document_id).execute()
            print(f"Document {document_id} has been deleted.")
        except Exception as e:
            print(f"Error deleting document: {e}")


In [83]:
class MarkdownParser:
    """
    A parser for converting Markdown files into Google Docs requests.
    This class processes different Markdown elements such as headings, lists,
    checkboxes, and footers, generating API requests to format the Google Docs document accordingly.
    """
    def __init__(self, google_docs_footer_id, filename):
        """
        Initializes the MarkdownParser with a file and footer ID.

        :param google_docs_footer_id: The footer segment ID in the Google Docs document.
        :param filename: The Markdown file to be parsed.
        """
        self.body_end_index = 1
        self.footer_end_index = 0
        self.footer_id = google_docs_footer_id
        self.filename = filename
        self.lines = []

        # convert markdown file into lines
        try:
            with open(filename, "r") as f:
                self.lines = f.readlines()
        except Exception as e:
            print(f"Fail to open file \"{filename}\", please double check your file path, error message: {e}")
            raise e



    def parse_document(self):
        """
        Parses the entire Markdown file and generates Google Docs API requests.

        :return: A list of requests for updating the Google Docs document.
        """
        requests = []
        index = 0
        while index < len(self.lines):
            line = self.lines[index]
            if line.startswith("#"):
                new_requests, line_content = self.__parse_title(line)
            elif line.startswith("- [ ] "):
                new_requests, line_content = self.__parse_checkbox(line[6:])
            elif line.startswith("* "):
                # parse leveled lists
                items = []
                while index < len(self.lines) and (self.lines[index].startswith("* ") or self.lines[index].startswith("  * ") or self.lines[index].startswith("    * ")):
                    items.append(self.lines[index])
                    index += 1
                index -= 1 # properly adjust line index
                new_requests, line_content = self.__parse_leveled_list(items)
            elif line.strip() == "":
                # we skip empty lines
                index += 1
                continue
            elif line.strip() == "---":
                # all remaining lines are footers
                new_requests, footer_line_content = self.__parse_footer(index, footer_id = self.footer_id)
                index = len(self.lines) # set index to last so the while loop ends
                line_content = ""
            else:
                # default parser when an unknown prefix appears
                new_requests, line_content = self.__parse_default_line(line)

            # special parser function for the @ symbols, segment id is in case there are @ symbols in the footer
            extra_requests = self.__parse_at_symbol(footer_line_content if index >= len(self.lines) else line_content, segment_id=self.footer_id if index >= len(self.lines) else None)

            requests.extend(new_requests)
            requests.extend(extra_requests)
            self.body_end_index += len(line_content)
            index += 1
        return requests


    def __parse_title(self, line):
        """
        Parses a Markdown heading and generates the appropriate Google Docs request.

        :param line: The heading line in Markdown.
        :return: A list of requests and the processed content.
        """
        heading_levels = {"# ": "HEADING_1", "## ": "HEADING_2", "### ": "HEADING_3"}
        heading_style = None

        for prefix, style in heading_levels.items():
            if line.startswith(prefix):
                heading_style = style
                line = line[len(prefix):]
                break

        # compose google docs requests
        requests = [{
            "insertText": {
                "location": {"index": self.body_end_index},
                "text": line
            }
        }, {
            "updateParagraphStyle": {
                "range": {
                    "startIndex": self.body_end_index,
                    "endIndex": self.body_end_index + len(line)
                },
                "paragraphStyle": {"namedStyleType": heading_style},
                "fields": "namedStyleType"
            }
        }]

        return requests, line

    def __parse_leveled_list(self, items):
        """
        Parses a leveled list and generates Google Docs API requests.

        :param index: The index of the current list item.
        :return: A list of requests and the processed content.
        """
        # we need two kinds of contents because \t will be convereted to the indentation of leveled lists,
        # and it does not got counted to document index
        content_for_google_docs = ""
        content_for_index = ""
        for item in items:
            if item.startswith("* "):
                content_for_google_docs += item[2:]
                content_for_index += item[2:]
            elif item.startswith("  * "):
                content_for_google_docs += "\t"
                content_for_google_docs += item[4:]
                content_for_index += item[4:]
            elif item.startswith("    * "):
                content_for_google_docs += "\t\t"
                content_for_google_docs += item[6:]
                content_for_index += item[6:]

        # compose google docs requests
        requests = [
            {
                "insertText": {
                    "location": {"index": self.body_end_index},
                    "text": content_for_google_docs
                }
            }, {
                "createParagraphBullets": {
                    "range": {
                        "startIndex": self.body_end_index,
                        "endIndex": self.body_end_index + len(content_for_google_docs)
                    },
                    "bulletPreset": "BULLET_DISC_CIRCLE_SQUARE",
                }
            }
        ]
        return requests, content_for_index


    def __parse_checkbox(self, line):
        """
        Parses a checkbox line and generates Google Docs API requests.

        :param line: The checkbox line without the prefix.
        :return: A list of requests and the processed content.
        """
        requests = [
            {
                "insertText": {
                    "location": {"index": self.body_end_index},
                    "text": line
                }
            }, {
                "createParagraphBullets": {
                    "range": {
                        "startIndex": self.body_end_index,
                        "endIndex": self.body_end_index + len(line)
                    },
                    "bulletPreset": "BULLET_CHECKBOX",
                }
            }
        ]
        return requests, line


    def __parse_footer(self, index, footer_id = None):
        """
        Parses the footer section of the Markdown document.

        :param index: The starting index of the footer.
        :return: A list of requests.
        """
        content = "".join(self.lines[index + 1:])
        requests = [
            {
                "insertText": {
                    "location": {
                        "segmentId": footer_id,
                        "index": 0
                    },
                    "text": content
                }
            }, {
                "updateTextStyle": {
                    "range": {
                        "startIndex": 0,
                        "endIndex": len(content),
                        "segmentId": footer_id
                    },
                    "textStyle": {
                        "italic": True
                    },
                    "fields": "italic"
                }
            }
        ]
        return requests, content

    def __parse_at_symbol(self, content, segment_id=None):
        """
        Parses @-mentions and applies bold and color formatting.

        :param content: The text content to be parsed.
        :return: A list of formatting requests.
        """
        requests = []
        base_index = self.body_end_index if segment_id is None else self.footer_end_index
        words = content.split()
        for word in words:
            if word.startswith("@") and len(word) > 1:
                start = base_index + content.index(word)
                end = start + len(word)
                requests.append({
                    "updateTextStyle": {
                        "range": {
                            "startIndex": start,
                            "endIndex": end,
                            "segmentId": segment_id
                            },
                        "textStyle": {
                            "bold": True,
                            "foregroundColor": {
                                "color": {
                                    "rgbColor": {
                                        "red": 0,
                                        "green": 0,
                                        "blue": 1
                                        }
                                    }
                                }
                            },
                        "fields": "bold,foregroundColor"
                    }
                })
        return requests


    def __parse_default_line(self, line):
        """
        Default parser when unknown prefix appears

        :param line: The raw line
        :return: A list of requests.
        """
        requests = [
            {
                "insertText": {
                    "location": {"index": self.body_end_index},
                    "text": line
                }
            }
        ]
        return requests, line

In [None]:
def main():
    MARKDOWN_FILE_NAME = "test.md"
    DOCUMENT_TITLE = "Convered Doc"

    # authenticate user's google account, and get google docs client from user
    google_docs_api_handler = GoogleDocsAPIHandler()

    # create new google docs for the markdown document
    document_id = google_docs_api_handler.create_document(DOCUMENT_TITLE)

    # get default footer id for us to parse footers
    footer_id = google_docs_api_handler.get_or_create_footer(document_id)

    try:
        # Read Markdown File and parse it to requets for google docs
        markdown_parser = MarkdownParser(footer_id, filename=MARKDOWN_FILE_NAME)
        requests = markdown_parser.parse_document()

        # update google docs according to parsed requests from the markdown file
        google_docs_api_handler.update_document(document_id, requests)

        # print out preview link for the document
        print("Document converted successfully, link: " + google_docs_api_handler.get_document_url(document_id))
    except Exception as e:
        print(e)
        # teardown, delete the created empty document if any error happens
        google_docs_api_handler.delete_document(document_id)

if __name__ == "__main__":
    main()

Document 1duHkbvsFVh14wxKRUj4ZkqMSHonKNpCIU-MBJPbWUPA has been created
Document 1duHkbvsFVh14wxKRUj4ZkqMSHonKNpCIU-MBJPbWUPA has been updated
Document 1duHkbvsFVh14wxKRUj4ZkqMSHonKNpCIU-MBJPbWUPA has been updated
Document converted successfully, link: https://docs.google.com/document/d/1duHkbvsFVh14wxKRUj4ZkqMSHonKNpCIU-MBJPbWUPA/edit
