In [4]:
import json
import re
from pathlib import Path

def split_notebook_cells(notebook_path, output_path=None):
    """
    Split markdown cells containing Python code blocks into separate markdown and code cells.
    
    Args:
        notebook_path (str): Path to the input .ipynb file
        output_path (str, optional): Path for the output .ipynb file. If None, overwrites the original.
    
    Returns:
        bool: True if processing was successful, False otherwise
    """
    try:
        # Load the notebook
        with open(notebook_path, 'r', encoding='utf-8') as f:
            notebook = json.load(f)
        
        new_cells = []
        cells_modified = 0
        
        for cell in notebook['cells']:
            if cell['cell_type'] == 'markdown':
                # Process markdown cell to find Python code blocks
                processed_cells = process_markdown_cell(cell)
                new_cells.extend(processed_cells)
                
                # Count if cell was split
                if len(processed_cells) > 1:
                    cells_modified += 1
            else:
                # Keep other cell types as they are
                new_cells.append(cell)
        
        # Update the notebook with new cells
        notebook['cells'] = new_cells
        
        # Determine output path
        if output_path is None:
            output_path = notebook_path
        
        # Save the modified notebook
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(notebook, f, indent=2, ensure_ascii=False)
        
        print(f"✅ Processing completed!")
        print(f"📁 Saved to: {output_path}")
        print(f"📊 {cells_modified} markdown cells were split")
        
        return True
        
    except FileNotFoundError:
        print(f"❌ Error: File '{notebook_path}' not found")
        return False
    except json.JSONDecodeError:
        print(f"❌ Error: Invalid JSON in '{notebook_path}'")
        return False
    except Exception as e:
        print(f"❌ Error processing notebook: {str(e)}")
        return False

def process_markdown_cell(cell):
    """
    Process a single markdown cell and split it if it contains Python code blocks.
    
    Args:
        cell (dict): The markdown cell from the notebook
        
    Returns:
        list: List of cells (markdown and/or code cells)
    """
    source = ''.join(cell['source']) if isinstance(cell['source'], list) else cell['source']
    
    # Pattern to match Python code blocks
    python_code_pattern = r'```python\n(.*?)\n```'
    
    # Find all Python code blocks
    matches = list(re.finditer(python_code_pattern, source, re.DOTALL))
    
    if not matches:
        # No Python code blocks found, return original cell
        return [cell]
    
    result_cells = []
    last_end = 0
    
    for match in matches:
        start, end = match.span()
        code_content = match.group(1)
        
        # Add markdown content before the code block (if any)
        if start > last_end:
            markdown_content = source[last_end:start].strip()
            if markdown_content:
                markdown_cell = create_markdown_cell(markdown_content, cell)
                result_cells.append(markdown_cell)
        
        # Add the Python code as a code cell
        if code_content.strip():
            code_cell = create_code_cell(code_content)
            result_cells.append(code_cell)
        
        last_end = end
    
    # Add any remaining markdown content after the last code block
    if last_end < len(source):
        remaining_content = source[last_end:].strip()
        if remaining_content:
            markdown_cell = create_markdown_cell(remaining_content, cell)
            result_cells.append(markdown_cell)
    
    return result_cells if result_cells else [cell]

def create_markdown_cell(content, original_cell):
    """
    Create a new markdown cell with the given content.
    
    Args:
        content (str): The markdown content
        original_cell (dict): The original cell to copy metadata from
        
    Returns:
        dict: New markdown cell
    """
    new_cell = {
        "cell_type": "markdown",
        "metadata": original_cell.get('metadata', {}),
        "source": content.split('\n') if '\n' in content else [content]
    }
    
    # Add id if present in original
    if 'id' in original_cell:
        new_cell['id'] = original_cell['id']
    
    return new_cell

def create_code_cell(code_content):
    """
    Create a new code cell with the given Python code.
    
    Args:
        code_content (str): The Python code content
        
    Returns:
        dict: New code cell
    """
    return {
        "cell_type": "code",
        "execution_count": None,
        "metadata": {},
        "outputs": [],
        "source": code_content.split('\n') if '\n' in code_content else [code_content]
    }

# Example usage in Jupyter notebook:

# Basic usage - split cells in the current notebook file
# split_notebook_cells("my_notebook.ipynb")

# Save to a different file
# split_notebook_cells("input_notebook.ipynb", "output_notebook.ipynb")

# Example of how to use it:
if __name__ == "__main__":
    # Example usage
    notebook_file = "equations_correlation.ipynb"
    success = split_notebook_cells(notebook_file)
    
    if success:
        print("🎉 All done! Your notebook has been processed.")
    else:
        print("❌ Something went wrong. Please check the error messages above.")

✅ Processing completed!
📁 Saved to: equations_correlation.ipynb
📊 2 markdown cells were split
🎉 All done! Your notebook has been processed.
