In [1]:
!pip -q install google-api-python-client google-auth google-auth-httplib2 google-auth-oauthlib

In [2]:
from google.colab import auth
auth.authenticate_user()

import google.auth
from googleapiclient.discovery import build

SCOPES = ["https://www.googleapis.com/auth/documents"]
creds, _ = google.auth.default(scopes=SCOPES)

docs_service = build("docs", "v1", credentials=creds)
print("Authenticated. Google Docs API client ready.")

Authenticated. Google Docs API client ready.


In [3]:
MARKDOWN_NOTES = """# Product Team Sync - May 15, 2023
## Attendees
- Sarah Chen (Product Lead)
- Mike Johnson (Engineering)
- Anna Smith (Design)
- David Park (QA)
## Agenda
### 1. Sprint Review
* Completed Features
 * User authentication flow
 * Dashboard redesign
 * Performance optimization
 * Reduced load time by 40%
 * Implemented caching solution
* Pending Items
 * Mobile responsive fixes
 * Beta testing feedback integration
### 2. Current Challenges
* Resource constraints in QA team
* Third-party API integration delays
* User feedback on new UI
 * Navigation confusion
 * Color contrast issues
### 3. Next Sprint Planning
* Priority Features
 * Payment gateway integration
 * User profile enhancement
 * Analytics dashboard
* Technical Debt
 * Code refactoring
 * Documentation updates
## Action Items
- [ ] @sarah: Finalize Q3 roadmap by Friday
- [ ] @mike: Schedule technical review for payment integration
- [ ] @anna: Share updated design system documentation
- [ ] @david: Prepare QA resource allocation proposal
## Next Steps
* Schedule individual team reviews
* Update sprint board
* Share meeting summary with stakeholders
## Notes
* Next sync scheduled for May 22, 2023
* Platform demo for stakeholders on May 25
* Remember to update JIRA tickets
---
Meeting recorded by: Sarah Chen
Duration: 45 minutes
"""

In [7]:
import re
from dataclasses import dataclass
from typing import List, Dict, Any, Tuple, Optional


# ============================================================
# Data model
# ============================================================
@dataclass
class ParagraphBlock:
    """
    Represents one paragraph in the final Google Doc.
    We store offsets so we can reliably apply styles after insertion.
    """
    kind: str   # h1, h2, h3, bullet, checkbox, subtitle, footer, sep, p
    text: str
    level: int = 0  # nesting level for indentation (bullets/checkboxes)

    # computed offsets after assembling text
    start: int = 0
    end: int = 0
    para_start: int = 0
    para_end: int = 0


class ConversionError(Exception):
    """Raised when parsing or Google Docs operations fail."""
    pass



# Parser:

class MarkdownParser:
    H1_RE = re.compile(r"^#\s+(.+)$")
    H2_RE = re.compile(r"^##\s+(.+)$")
    H3_RE = re.compile(r"^###\s+(.+)$")
    BULLET_RE = re.compile(r"^(\s*)[-*]\s+(.+)$")
    CHECKBOX_RE = re.compile(r"^\s*-\s*\[\s*\]\s*(.+)$")

    def __init__(self, indent_spaces_per_level: int = 2, separator_line: str = "──────────"):
        self.indent_spaces_per_level = indent_spaces_per_level
        self.separator_line = separator_line

    def _indent_level(self, raw_line: str) -> int:
        """
        The sample markdown sometimes uses 1 space for nesting.
        Treat:
          0 spaces => level 0
          1-2 spaces => level 1
          3-4 spaces => level 2
          etc.
        """
        spaces = len(raw_line) - len(raw_line.lstrip(" "))
        if spaces <= 0:
            return 0
        return (spaces + 1) // self.indent_spaces_per_level

    def parse(self, md: str) -> List[ParagraphBlock]:
        if not md or not md.strip():
            raise ConversionError("Markdown input is empty.")

        blocks: List[ParagraphBlock] = []
        in_footer = False

        for raw in md.splitlines():
            line = raw.rstrip("\n")
            if not line.strip():
                continue

            # Footer begins after the markdown horizontal rule
            if line.strip() == "---":
                in_footer = True
                blocks.append(ParagraphBlock(kind="sep", text=self.separator_line))
                continue

            if in_footer:
                blocks.append(ParagraphBlock(kind="footer", text=line.strip()))
                continue

            # Headings
            m = self.H1_RE.match(line)
            if m:
                full_title = m.group(1).strip()
                # Requirement: H1 must be "Product Team Sync"
                # If title includes date, we split it and keep date as subtitle.
                if " - " in full_title:
                    title, date = full_title.split(" - ", 1)
                    blocks.append(ParagraphBlock(kind="h1", text=title.strip()))
                    blocks.append(ParagraphBlock(kind="subtitle", text=date.strip()))
                else:
                    blocks.append(ParagraphBlock(kind="h1", text=full_title))
                continue

            m = self.H2_RE.match(line)
            if m:
                blocks.append(ParagraphBlock(kind="h2", text=m.group(1).strip()))
                continue

            m = self.H3_RE.match(line)
            if m:
                blocks.append(ParagraphBlock(kind="h3", text=m.group(1).strip()))
                continue

            # Checkbox tasks
            m = self.CHECKBOX_RE.match(line)
            if m:
                blocks.append(
                    ParagraphBlock(kind="checkbox", text=m.group(1).strip(), level=self._indent_level(line))
                )
                continue

            # Bullets
            m = self.BULLET_RE.match(line)
            if m:
                blocks.append(
                    ParagraphBlock(kind="bullet", text=m.group(2).strip(), level=self._indent_level(line))
                )
                continue

            # Fallback paragraph
            blocks.append(ParagraphBlock(kind="p", text=line.strip()))

        return blocks



# Writer: creates doc, inserts content, applies formatting

class GoogleDocWriter:
    MENTION_RE = re.compile(r"@[\w\-]+")

    def __init__(
        self,
        docs_service,
        indent_pt_per_level: int = 18,
        mention_rgb: Optional[Dict[str, float]] = None,
        footer_rgb: Optional[Dict[str, float]] = None,
    ):
        self.docs_service = docs_service
        self.indent_pt_per_level = indent_pt_per_level
        self.mention_rgb = mention_rgb or {"red": 0.10, "green": 0.35, "blue": 0.75}
        self.footer_rgb = footer_rgb or {"red": 0.40, "green": 0.40, "blue": 0.40}

    def create_document(self, title: str) -> str:
        try:
            doc = self.docs_service.documents().create(body={"title": title}).execute()
            return doc["documentId"]
        except Exception as e:
            raise ConversionError(f"Failed to create Google Doc: {e}")

    def _assemble_text(self, blocks: List[ParagraphBlock]) -> str:
        """
        We insert everything once at doc index 1.
        While building the big text blob, we compute offsets for each paragraph.
        """
        cursor = 0
        parts = []
        for b in blocks:
            b.start = cursor
            b.end = cursor + len(b.text)
            b.para_start = b.start
            b.para_end = b.end + 1  # include newline
            parts.append(b.text + "\n")
            cursor = b.para_end
        return "".join(parts)

    @staticmethod
    def _contiguous_segments(blocks: List[ParagraphBlock], kinds: set) -> List[Tuple[int, int]]:
        """Return segments (start_idx, end_idx) where block.kind is in kinds, contiguous."""
        segs = []
        i = 0
        while i < len(blocks):
            if blocks[i].kind not in kinds:
                i += 1
                continue
            start = i
            while i < len(blocks) and blocks[i].kind in kinds:
                i += 1
            segs.append((start, i - 1))
        return segs

    def _para_range(self, b: ParagraphBlock, shift: int) -> Dict[str, int]:
        return {"startIndex": b.para_start + shift, "endIndex": b.para_end + shift}

    def _text_range(self, start: int, end: int, shift: int) -> Dict[str, int]:
        return {"startIndex": start + shift, "endIndex": end + shift}

    def write(self, doc_id: str, blocks: List[ParagraphBlock]) -> None:
        # Insert content first
        text = self._assemble_text(blocks)
        try:
            self.docs_service.documents().batchUpdate(
                documentId=doc_id,
                body={"requests": [{"insertText": {"location": {"index": 1}, "text": text}}]},
            ).execute()
        except Exception as e:
            raise ConversionError(f"Failed to insert text into document: {e}")

        # After insertion at index 1, all our offsets shift by +1
        SHIFT = 1
        requests: List[Dict[str, Any]] = []


        # A) Create bullets and checkboxes

        for s, e in self._contiguous_segments(blocks, {"bullet"}):
            start = blocks[s].para_start + SHIFT
            end = blocks[e].para_end + SHIFT
            requests.append({
                "createParagraphBullets": {
                    "range": {"startIndex": start, "endIndex": end},
                    "bulletPreset": "BULLET_DISC_CIRCLE_SQUARE",
                }
            })
            for i in range(s, e + 1):
                b = blocks[i]
                requests.append({
                    "updateParagraphStyle": {
                        "range": self._para_range(b, SHIFT),
                        "paragraphStyle": {
                            "indentStart": {"magnitude": self.indent_pt_per_level * b.level, "unit": "PT"}
                        },
                        "fields": "indentStart",
                    }
                })

        for s, e in self._contiguous_segments(blocks, {"checkbox"}):
            start = blocks[s].para_start + SHIFT
            end = blocks[e].para_end + SHIFT
            requests.append({
                "createParagraphBullets": {
                    "range": {"startIndex": start, "endIndex": end},
                    "bulletPreset": "BULLET_CHECKBOX",
                }
            })
            for i in range(s, e + 1):
                b = blocks[i]
                requests.append({
                    "updateParagraphStyle": {
                        "range": self._para_range(b, SHIFT),
                        "paragraphStyle": {
                            "indentStart": {"magnitude": self.indent_pt_per_level * b.level, "unit": "PT"}
                        },
                        "fields": "indentStart",
                    }
                })

        # B) Text styling

        for b in blocks:
            if b.kind == "subtitle":
                requests.append({
                    "updateTextStyle": {
                        "range": self._text_range(b.start, b.end, SHIFT),
                        "textStyle": {
                            "italic": True,
                            "foregroundColor": {"color": {"rgbColor": self.footer_rgb}},
                        },
                        "fields": "italic,foregroundColor",
                    }
                })
            elif b.kind == "footer":
                requests.append({
                    "updateTextStyle": {
                        "range": self._text_range(b.start, b.end, SHIFT),
                        "textStyle": {
                            "italic": True,
                            "foregroundColor": {"color": {"rgbColor": self.footer_rgb}},
                        },
                        "fields": "italic,foregroundColor",
                    }
                })
            elif b.kind == "sep":
                requests.append({
                    "updateTextStyle": {
                        "range": self._text_range(b.start, b.end, SHIFT),
                        "textStyle": {
                            "foregroundColor": {"color": {"rgbColor": self.footer_rgb}},
                        },
                        "fields": "foregroundColor",
                    }
                })

            # Style
            for m in self.MENTION_RE.finditer(b.text):
                ms = b.start + m.start()
                me = b.start + m.end()
                requests.append({
                    "updateTextStyle": {
                        "range": self._text_range(ms, me, SHIFT),
                        "textStyle": {
                            "bold": True,
                            "foregroundColor": {"color": {"rgbColor": self.mention_rgb}},
                        },
                        "fields": "bold,foregroundColor",
                    }
                })


        # C) Headings LAST

        for b in blocks:
            if b.kind == "h1":
                requests.append({
                    "updateParagraphStyle": {
                        "range": self._para_range(b, SHIFT),
                        "paragraphStyle": {"namedStyleType": "HEADING_1"},
                        "fields": "namedStyleType",
                    }
                })
            elif b.kind == "h2":
                requests.append({
                    "updateParagraphStyle": {
                        "range": self._para_range(b, SHIFT),
                        "paragraphStyle": {"namedStyleType": "HEADING_2"},
                        "fields": "namedStyleType",
                    }
                })
            elif b.kind == "h3":
                requests.append({
                    "updateParagraphStyle": {
                        "range": self._para_range(b, SHIFT),
                        "paragraphStyle": {"namedStyleType": "HEADING_3"},
                        "fields": "namedStyleType",
                    }
                })

        # Send formatting requests in small batches
        try:
            for i in range(0, len(requests), 80):
                self.docs_service.documents().batchUpdate(
                    documentId=doc_id,
                    body={"requests": requests[i:i + 80]},
                ).execute()
        except Exception as e:
            raise ConversionError(f"Failed to apply formatting: {e}")


# Converter
class MarkdownToGoogleDocConverter:
    def __init__(self, docs_service):
        self.parser = MarkdownParser(indent_spaces_per_level=2, separator_line="──────────")
        self.writer = GoogleDocWriter(docs_service, indent_pt_per_level=18)

    def convert(self, markdown_text: str, doc_title: str) -> str:
        blocks = self.parser.parse(markdown_text)
        doc_id = self.writer.create_document(doc_title)
        self.writer.write(doc_id, blocks)
        return doc_id




def verify_named_styles(docs_service, doc_id: str) -> None:
    doc = docs_service.documents().get(documentId=doc_id).execute()
    content = doc.get("body", {}).get("content", [])

    targets = {"Product Team Sync", "Attendees", "Agenda", "1. Sprint Review"}
    found = {}

    for el in content:
        para = el.get("paragraph")
        if not para:
            continue

        txt = ""
        for pe in para.get("elements", []):
            tr = pe.get("textRun")
            if tr:
                txt += tr.get("content", "")
        clean = txt.strip()

        if clean in targets:
            style = para.get("paragraphStyle", {}).get("namedStyleType", "UNKNOWN")
            found[clean] = style

    print("Named style check:")
    for k in ["Product Team Sync", "Attendees", "Agenda", "1. Sprint Review"]:
        print(f" - {k}: {found.get(k, 'NOT FOUND')}")



# Run conversion

try:
    converter = MarkdownToGoogleDocConverter(docs_service)
    doc_id = converter.convert(MARKDOWN_NOTES, doc_title="Product Team Sync (Converted)")
    print("Document created successfully.")
    print("Open:", f"https://docs.google.com/document/d/{doc_id}/edit")

    # This prints HEADING_1/2/3 for key lines
    verify_named_styles(docs_service, doc_id)

except ConversionError as ce:
    print("ConversionError:", ce)
except Exception as e:
    print("Unexpected error:", e)

Document created successfully.
Open: https://docs.google.com/document/d/1zdZ7uctaE7HRWLh-O_gzA1co-2XqChaN13IqGZO44gM/edit
Named style check:
 - Product Team Sync: HEADING_1
 - Attendees: HEADING_2
 - Agenda: HEADING_2
 - 1. Sprint Review: HEADING_3
