In [1]:
# ============================================================
# Google Colab: Markdown Meeting Notes -> Formatted Google Doc
# ============================================================

# --- 1) Install & import dependencies (Colab) ---
!pip -q install google-api-python-client google-auth google-auth-httplib2 google-auth-oauthlib

import re
from dataclasses import dataclass
from typing import List, Optional, Tuple

from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth import exceptions as google_auth_exceptions

In [2]:
# --- 2) Input markdown (from the assessment prompt) ---
MARKDOWN_NOTES = """# Product Team Sync - May 15, 2023

## Attendees
- Sarah Chen (Product Lead)
- Mike Johnson (Engineering)
- Anna Smith (Design)
- David Park (QA)

## Agenda

### 1. Sprint Review
* Completed Features
  * User authentication flow
  * Dashboard redesign
  * Performance optimization
    * Reduced load time by 40%
    * Implemented caching solution
* Pending Items
  * Mobile responsive fixes
  * Beta testing feedback integration

### 2. Current Challenges
* Resource constraints in QA team
* Third-party API integration delays
* User feedback on new UI
  * Navigation confusion
  * Color contrast issues

### 3. Next Sprint Planning
* Priority Features
  * Payment gateway integration
  * User profile enhancement
  * Analytics dashboard
* Technical Debt
  * Code refactoring
  * Documentation updates

## Action Items
- [ ] @sarah: Finalize Q3 roadmap by Friday
- [ ] @mike: Schedule technical review for payment integration
- [ ] @anna: Share updated design system documentation
- [ ] @david: Prepare QA resource allocation proposal

## Next Steps
* Schedule individual team reviews
* Update sprint board
* Share meeting summary with stakeholders

## Notes
* Next sync scheduled for May 22, 2023
* Platform demo for stakeholders on May 25
* Remember to update JIRA tickets

---
Meeting recorded by: Sarah Chen
Duration: 45 minutes
"""

In [3]:
# --- 3) Data model for parsed lines ---
@dataclass
class LineInfo:
    raw: str
    text: str
    kind: str  # "h1" | "h2" | "h3" | "bullet" | "checkbox" | "hr" | "footer" | "normal" | "blank"
    level: int = 0  # for bullets/checkbox nesting
    start: int = 0  # doc index start (computed later)
    end: int = 0    # doc index end (computed later)

In [4]:
# --- 4) Markdown parsing ---
MENTION_RE = re.compile(r"@\w+")

def _count_indent_level(raw_line: str) -> int:
    leading = len(raw_line) - len(raw_line.lstrip(" "))
    # The sample uses a single leading space for nested bullets; treat >=1 as level 1.
    if leading <= 0:
        return 0
    if leading == 1:
        return 1
    return max(1, leading // 2)

def parse_markdown(md: str) -> List[LineInfo]:
    lines: List[LineInfo] = []
    in_footer = False

    for raw in md.splitlines():
        line = raw.rstrip("\n")

        if line.strip() == "":
            lines.append(LineInfo(raw=line, text="", kind="blank"))
            continue

        if line.strip() == "---":
            lines.append(LineInfo(raw=line, text="", kind="hr"))
            in_footer = True
            continue

        if in_footer:
            # Footer lines: keep as normal text but style separately later
            lines.append(LineInfo(raw=line, text=line.strip(), kind="footer"))
            continue

        # Headings
        if line.startswith("# "):
            lines.append(LineInfo(raw=line, text=line[2:].strip(), kind="h1"))
            continue
        if line.startswith("## "):
            lines.append(LineInfo(raw=line, text=line[3:].strip(), kind="h2"))
            continue
        if line.startswith("### "):
            lines.append(LineInfo(raw=line, text=line[4:].strip(), kind="h3"))
            continue

        # Checkboxes: "- [ ] ..." or "- [x] ..."
        m_cb = re.match(r"^\s*-\s*\[( |x|X)\]\s+(.*)$", line)
        if m_cb:
            level = _count_indent_level(line)
            lines.append(LineInfo(raw=line, text=m_cb.group(2).strip(), kind="checkbox", level=level))
            continue

        # Bullets: "-" or "*" (allow indentation)
        m_b = re.match(r"^(\s*)[-*]\s+(.*)$", line)
        if m_b:
            level = _count_indent_level(line)
            lines.append(LineInfo(raw=line, text=m_b.group(2).strip(), kind="bullet", level=level))
            continue

        # Fallback
        lines.append(LineInfo(raw=line, text=line.strip(), kind="normal"))

    return lines


In [5]:
# --- 5) Google Docs creation & formatting requests ---
def colab_authenticate():
    """
    Colab auth: prompts user to authorize. Uses Application Default Credentials.
    """
    try:
        auth.authenticate_user()
    except google_auth_exceptions.GoogleAuthError as e:
        raise RuntimeError(f"Authentication failed: {e}")

def create_doc(service, title: str) -> str:
    doc = service.documents().create(body={"title": title}).execute()
    return doc["documentId"]

def build_doc_text_and_ranges(lines: List[LineInfo]) -> str:
    """
    Build a plain-text version of the document content, and compute doc indices.
    Google Docs body content begins at index 1.
    """
    cursor = 1
    parts = []

    for li in lines:
        rendered = li.text

        if li.kind == "hr":
            rendered = ""

        if li.kind == "blank":
            rendered = ""

        line_text = rendered + "\n"
        li.start = cursor
        li.end = cursor + len(line_text)
        cursor = li.end
        parts.append(line_text)

    return "".join(parts)

def req_update_paragraph_style(start: int, end: int, named_style: str, fields: str = "namedStyleType"):
    return {
        "updateParagraphStyle": {
            "range": {"startIndex": start, "endIndex": end},
            "paragraphStyle": {"namedStyleType": named_style},
            "fields": fields
        }
    }

def req_create_bullets(start: int, end: int, preset: str):
    return {
        "createParagraphBullets": {
            "range": {"startIndex": start, "endIndex": end},
            "bulletPreset": preset
        }
    }

def req_set_nesting_level(start: int, end: int, level: int):
    # Nesting level is supported as a paragraph style field in the Docs API.
    return {
        "updateParagraphStyle": {
            "range": {"startIndex": start, "endIndex": end},
            "paragraphStyle": {"nestingLevel": level},
            "fields": "nestingLevel"
        }
    }

def req_update_text_style(start: int, end: int, *, bold=None, italic=None, font_size_pt=None, rgb=None):
    style = {}
    fields = []

    if bold is not None:
        style["bold"] = bool(bold)
        fields.append("bold")
    if italic is not None:
        style["italic"] = bool(italic)
        fields.append("italic")
    if font_size_pt is not None:
        style["fontSize"] = {"magnitude": float(font_size_pt), "unit": "PT"}
        fields.append("fontSize")
    if rgb is not None:
        # rgb is tuple (r,g,b) in 0..1
        r, g, b = rgb
        style["foregroundColor"] = {"color": {"rgbColor": {"red": r, "green": g, "blue": b}}}
        fields.append("foregroundColor")

    return {
        "updateTextStyle": {
            "range": {"startIndex": start, "endIndex": end},
            "textStyle": style,
            "fields": ",".join(fields)
        }
    }

def build_formatting_requests(lines: List[LineInfo]) -> List[dict]:
    requests: List[dict] = []

    # Headings
    for li in lines:
        if li.kind == "h1":
            requests.append(req_update_paragraph_style(li.start, li.end, "HEADING_1"))
        elif li.kind == "h2":
            requests.append(req_update_paragraph_style(li.start, li.end, "HEADING_2"))
        elif li.kind == "h3":
            requests.append(req_update_paragraph_style(li.start, li.end, "HEADING_3"))

    # Bullets + checkboxes + nesting
    for li in lines:
        if li.kind == "bullet":
            requests.append(req_create_bullets(li.start, li.end, "BULLET_DISC_CIRCLE_SQUARE"))
            if li.level > 0:
                requests.append(req_set_nesting_level(li.start, li.end, li.level))
        elif li.kind == "checkbox":
            requests.append(req_create_bullets(li.start, li.end, "BULLET_CHECKBOX"))
            if li.level > 0:
                requests.append(req_set_nesting_level(li.start, li.end, li.level))

    # Mentions styling (bold + blue)
    for li in lines:
        if li.kind in ("bullet", "checkbox", "normal", "footer"):
            # Find @mentions inside the rendered text
            for m in MENTION_RE.finditer(li.text):
                # Compute absolute indices within the doc line
                # li.start points to the start of the line, and text begins immediately.
                mention_start = li.start + m.start()
                mention_end = li.start + m.end()
                requests.append(req_update_text_style(mention_start, mention_end, bold=True, rgb=(0.11, 0.46, 0.95)))

    # Footer distinct style: smaller, gray, italic
    for li in lines:
        if li.kind == "footer":
            # Apply to the whole line excluding the final newline:
            start = li.start
            end = max(li.start, li.end - 1)
            requests.append(req_update_text_style(start, end, italic=True, font_size_pt=10, rgb=(0.45, 0.45, 0.45)))

    return requests


def markdown_to_google_doc(md: str, doc_title: str = "Product Team Sync") -> Tuple[str, str]:
    """
    Returns (document_id, document_url)
    """
    colab_authenticate()
    docs_service = build("docs", "v1")

    # Parse markdown
    lines = parse_markdown(md)

    # Create doc
    document_id = create_doc(docs_service, doc_title)

    # Insert all text at once
    doc_text = build_doc_text_and_ranges(lines)
    insert_req = [{
        "insertText": {
            "location": {"index": 1},
            "text": doc_text
        }
    }]

    # Apply formatting
    fmt_reqs = build_formatting_requests(lines)

    try:
        docs_service.documents().batchUpdate(
            documentId=document_id,
            body={"requests": insert_req + fmt_reqs}
        ).execute()
    except HttpError as e:
        raise RuntimeError(f"Google Docs API error: {e}")

    doc_url = f"https://docs.google.com/document/d/{document_id}/edit"
    return document_id, doc_url


In [None]:
# --- 6) Run it ---
doc_id, url = markdown_to_google_doc(MARKDOWN_NOTES, doc_title="Product Team Sync")
print("Created Google Doc:")
print(url)
