# Google Docs Markdown Converter

This notebook converts Markdown content to Google Docs format with proper styling and formatting.

## Step 1: Install Required Dependencies

Install the Google API client libraries needed for Google Docs integration.

In [None]:
print("\nInstalling Required Dependencies...")
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib -q

## Step 2: Authenticate with Google

Set up authentication with Google Docs API and define helper functions for document creation.

In [None]:
import re
from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


def authenticate_google_docs():
    """Authenticate and create Google Docs API service"""
    try:
        print("Authenticating with Google Docs API...")
        auth.authenticate_user()
        service = build('docs', 'v1')
        print("Google Docs API authenticated")
        return service
    except Exception as e:
        print(f"Authentication failed: {str(e)}")
        raise


def create_google_doc(service, title):
    """Create a new Google Doc"""
    try:
        print("Creating Google Doc...")
        doc = service.documents().create(body={'title': title}).execute()
        doc_id = doc['documentId']
        return doc_id
    except HttpError as error:
        print(f"Failed to create Google Doc: {str(error)}")
        raise

## Step 3: Markdown Parsing Functions

These functions parse Markdown content and convert it to structured format for Google Docs.

In [None]:
HEADING_PATTERN = re.compile(r'^(#+)\s+(.*)$')
CHECKBOX_PATTERN = re.compile(r'^(\s*)- \[ \](.*)$')
BULLET_PATTERN = re.compile(r'^(\s*)[*\-]\s+(.*)$')
SEPARATOR_PATTERN = re.compile(r'^---+$')
ASSIGNEE_PATTERN = re.compile(r'@([\w]+)')

CONTENT_TYPES = {
    'HEADING1': 'HEADING1',
    'HEADING2': 'HEADING2', 
    'HEADING3': 'HEADING3',
    'CHECKBOX_ITEM': 'CHECKBOX_ITEM',
    'BULLET_ITEM': 'BULLET_ITEM',
    'SEPARATOR': 'SEPARATOR',
    'PARAGRAPH': 'PARAGRAPH'
}

# Pre-defined styling configurations
STYLING_CONFIG = {
    'HEADING1': {
        'namedStyleType': 'HEADING_1',
        'fields': 'namedStyleType'
    },
    'HEADING2': {
        'namedStyleType': 'HEADING_2',
        'fields': 'namedStyleType'
    },
    'HEADING3': {
        'namedStyleType': 'HEADING_3',
        'fields': 'namedStyleType'
    },
    'BULLET_ITEM': {
        'bulletPreset': 'BULLET_DISC_CIRCLE_SQUARE'
    },
    'CHECKBOX_ITEM': {
        'bulletPreset': 'BULLET_CHECKBOX'
    },
    'FOOTER': {
        'italic': True,
        'foregroundColor': {'color': {'rgbColor': {'red': 0.42, 'green': 0.42, 'blue': 0.42}}},
        'fontSize': {'magnitude': 11, 'unit': 'PT'},
        'fields': 'italic,foregroundColor,fontSize'
    },
    'SEPARATOR_STYLE': {
        'italic': True,
        'foregroundColor': {'color': {'rgbColor': {'red': 0.42, 'green': 0.42, 'blue': 0.42}}},
        'fontSize': {'magnitude': 10, 'unit': 'PT'},
        'fields': 'italic,foregroundColor,fontSize'
    },
    'ASSIGNEE': {
        'bold': True,
        'foregroundColor': {'color': {'rgbColor': {'red': 0.23, 'green': 0.42, 'blue': 0.81}}},
        'fields': 'bold,foregroundColor'
    }
}


def parse_markdown_content(markdown_text):
    """Parse markdown content using pre-compiled regex patterns"""
    lines = markdown_text.strip().split('\n')
    structured_content = []

    for line in lines:
        line = line.rstrip()
        stripped_line = line.strip()
        
        if not stripped_line:
            continue

        # Detect heading levels
        heading_match = HEADING_PATTERN.match(line)
        if heading_match:
            level = len(heading_match.group(1))
            content = heading_match.group(2).strip()
            heading_type = f"HEADING{min(level, 3)}"
            structured_content.append({'type': heading_type, 'content': content})
            continue

        # Detect checkbox items
        checkbox_match = CHECKBOX_PATTERN.match(line)
        if checkbox_match:
            indent_level = len(checkbox_match.group(1))
            content = checkbox_match.group(2).strip()
            structured_content.append({
                'type': CONTENT_TYPES['CHECKBOX_ITEM'],
                'content': content,
                'indent': indent_level
            })
            continue

        # Detect regular bullet points
        bullet_match = BULLET_PATTERN.match(line)
        if bullet_match:
            indent_level = len(bullet_match.group(1))
            content = bullet_match.group(2).strip()
            structured_content.append({
                'type': CONTENT_TYPES['BULLET_ITEM'],
                'content': content,
                'indent': indent_level
            })
            continue

        # Detect horizontal rule/separator
        if SEPARATOR_PATTERN.match(stripped_line):
            structured_content.append({'type': CONTENT_TYPES['SEPARATOR'], 'content': ''})
            continue

        # Default to paragraph
        structured_content.append({'type': CONTENT_TYPES['PARAGRAPH'], 'content': stripped_line})

    return structured_content

In [None]:
def create_document_requests(structured_content):
    """Create Google Docs API requests from structured content"""
    requests = []
    current_index = 1
    found_separator = False

    for item in structured_content:
        content_type = item['type']
        content = item['content']
        
        # Update separator detection
        if content_type == CONTENT_TYPES['SEPARATOR']:
            found_separator = True
        
        # Handle different content types
        if content_type == CONTENT_TYPES['SEPARATOR']:
            text_to_insert = '\n' + '-' * 42 + '\n'
        else:
            text_to_insert = content + '\n'

        # Insert text request
        insert_request = {
            'insertText': {
                'location': {'index': current_index},
                'text': text_to_insert
            }
        }
        requests.append(insert_request)

        start_index = current_index
        end_index = current_index + len(text_to_insert)
        
        # Apply styling based on content type using pre-defined configurations
        if content_type in (CONTENT_TYPES['HEADING1'], CONTENT_TYPES['HEADING2'], CONTENT_TYPES['HEADING3']):
            style_config = STYLING_CONFIG[content_type]
            requests.append({
                'updateParagraphStyle': {
                    'range': {'startIndex': start_index, 'endIndex': end_index},
                    'paragraphStyle': {'namedStyleType': style_config['namedStyleType']},
                    'fields': style_config['fields']
                }
            })
        
        elif content_type == CONTENT_TYPES['BULLET_ITEM']:
            # Apply bullet styling with automatic nesting
            indent_level = item.get('indent', 0)
            requests.append({
                'createParagraphBullets': {
                    'range': {'startIndex': start_index, 'endIndex': end_index},
                    'bulletPreset': STYLING_CONFIG['BULLET_ITEM']['bulletPreset']
                }
            })

            if indent_level > 0:
                requests.append({
                    'updateParagraphStyle': {
                        'range': {'startIndex': start_index, 'endIndex': end_index},
                        'paragraphStyle': {
                            'indentFirstLine': {'magnitude': 18 * indent_level, 'unit': 'PT'},
                            'indentStart': {'magnitude': 18 * indent_level, 'unit': 'PT'}
                        },
                        'fields': 'indentFirstLine,indentStart'
                    }
                })
        
        elif content_type == CONTENT_TYPES['CHECKBOX_ITEM']:
            indent_level = item.get('indent', 0)
            requests.append({
                'createParagraphBullets': {
                    'range': {'startIndex': start_index, 'endIndex': end_index},
                    'bulletPreset': STYLING_CONFIG['CHECKBOX_ITEM']['bulletPreset']
                }
            })

            if indent_level > 0:
                requests.append({
                    'updateParagraphStyle': {
                        'range': {'startIndex': start_index, 'endIndex': end_index},
                        'paragraphStyle': {
                            'indentFirstLine': {'magnitude': 18 * indent_level, 'unit': 'PT'},
                            'indentStart': {'magnitude': 18 * indent_level, 'unit': 'PT'}
                        },
                        'fields': 'indentFirstLine,indentStart'
                    }
                })

        # Style assignee mentions (@name)
        assignee_matches = ASSIGNEE_PATTERN.finditer(content)
        assignee_config = STYLING_CONFIG['ASSIGNEE']
        for match in assignee_matches:
            assignee_start = start_index + match.start()
            assignee_end = start_index + match.end()

            requests.append({
                'updateTextStyle': {
                    'range': {'startIndex': assignee_start, 'endIndex': assignee_end},
                    'textStyle': {
                        **{k: v for k, v in assignee_config.items() if k != 'fields'}
                    },
                    'fields': assignee_config['fields']
                }
            })

        # Style footer content (only after separator)
        if found_separator and content_type != CONTENT_TYPES['SEPARATOR']:
            footer_config = STYLING_CONFIG['FOOTER']
            requests.append({
                'updateTextStyle': {
                    'range': {'startIndex': start_index, 'endIndex': end_index},
                    'textStyle': {
                        **{k: v for k, v in footer_config.items() if k != 'fields'}
                    },
                    'fields': footer_config['fields']
                }
            })
        
        elif content_type == CONTENT_TYPES['SEPARATOR']:
            separator_config = STYLING_CONFIG['SEPARATOR_STYLE']
            requests.append({
                'updateTextStyle': {
                    'range': {'startIndex': start_index, 'endIndex': end_index},
                    'textStyle': {
                        **{k: v for k, v in separator_config.items() if k != 'fields'}
                    },
                    'fields': separator_config['fields']
                }
            })

        current_index = end_index

    return requests

## Step 4: Main Converter Class

This class provides a clean interface for converting Markdown to Google Docs.

In [None]:
class GoogleDocConverter:
    """Main converter class for Markdown to Google Docs conversion"""
    
    def __init__(self):
        """Initialize the converter"""
        self.service = None
        self.doc_id = None
        self._is_authenticated = False
    
    def _check_auth(self):
        """Check authentication is done, return True if successful"""
        if not self._is_authenticated:
            try:
                self.service = authenticate_google_docs()
                self._is_authenticated = True
                return True
            except Exception as e:
                print(f"Authentication failed: {str(e)}")
                return False
        return True
    
    def create_document(self, title="Meeting Notes"):
        """Create a new Google Doc with the specified title"""
        if not self._check_auth():
            return None
            
        try:
            self.doc_id = create_google_doc(self.service, title)
            print(f"Created document: {title}")
            return self.doc_id
            
        except Exception as e:
            print(f"Failed to create document: {str(e)}")
            return None
    
    def convert(self, markdown_content):
        """Convert markdown content and add it to the current document"""
        if not self.doc_id:
            print("No document created. Please call create_document() first.")
            return None
            
        try:
            # Parse markdown content
            structured_content = parse_markdown_content(markdown_content)
            
            # Create API requests for formatting
            requests = create_document_requests(structured_content)
            
            # Execute batch update only if there are requests
            if requests:
                result = self.service.documents().batchUpdate(
                    documentId=self.doc_id,
                    body={'requests': requests}
                ).execute()

            print("Conversion completed")
            print(f"\nDocument is ready @: https://docs.google.com/document/d/{self.doc_id}/edit")
            
            return self.doc_id
            
        except Exception as e:
            print(f"An error occurred during conversion: {str(e)}")
            return None
    
    def convert_and_create(self, markdown_content, title="Meeting Notes"):
        """Chain document creation and conversion in one call"""
        doc_id = self.create_document(title)
        if doc_id:
            return self.convert(markdown_content)
        return None

## Step 5: Running Everything - Example Usage

Execute the conversion using the class-based interface.

In [None]:
def use_example_content():
    print("\nUsing example meeting notes...")

    markdown_content = """
# Product Team Sync - May 15, 2023


## Attendees

- Sarah Chen (Product Lead)
- Mike Johnson (Engineering)
- Anna Smith (Design)
- David Park (QA)


## Agenda

### 1. Sprint Review

* Completed Features
  * User authentication flow
  * Dashboard redesign
  * Performance optimization
    * Reduced load time by 40%
    * Implemented caching solution
* Pending Items
  * Mobile responsive fixes
  * Beta testing feedback integration


### 2. Current Challenges

* Resource constraints in QA team
* Third-party API integration delays
* User feedback on new UI
  * Navigation confusion
  * Color contrast issues


### 3. Next Sprint Planning

* Priority Features
  * Payment gateway integration
  * User profile enhancement
  * Analytics dashboard
* Technical Debt
  * Code refactoring
  * Documentation updates


## Action Items

- [ ] @sarah: Finalize Q3 roadmap by Friday
- [ ] @mike: Schedule technical review for payment integration
- [ ] @anna: Share updated design system documentation
- [ ] @david: Prepare QA resource allocation proposal


## Next Steps

* Schedule individual team reviews
* Update sprint board
* Share meeting summary with stakeholders


## Notes

* Next sync scheduled for May 22, 2023
* Platform demo for stakeholders on May 25
* Remember to update JIRA tickets


---

Meeting recorded by: Sarah Chen
Duration: 45 minutes
"""

    # Create converter instance
    converter = GoogleDocConverter()
    
    # Create document
    converter.create_document("Product Team Sync - Meeting Notes")
    
    # Convert markdown content
    converter.convert(markdown_content)
    
    return converter.doc_id


def main():
    use_example_content()


if __name__ == "__main__":
    main()

## Ready to Run!

You can now execute the cells in order:
1. Install Dependencies
2. Authenticate with Google
3. Define Markdown parsing functions
4. Define main converter class
5. Run the example conversion

The notebook will create a Google Doc with the formatted meeting notes using the new class-based interface!