# File to Notebook Restructurer

This notebook provides a systematic approach to convert a source file into a well-structured Jupyter notebook with proper titles, subtitles, comments, and documentation.

## 1. Load the Source File

In this section, we'll read a source file into memory using Python's file handling methods.

In [None]:
# Import necessary libraries
import os
import re
import json
import nbformat
from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell

# Function to load the source file
def load_source_file(file_path):
    """
    Load a source file into memory.
    
    Args:
        file_path (str): Path to the source file
        
    Returns:
        str: Content of the file
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        print(f"Successfully loaded file: {file_path}")
        return content
    except Exception as e:
        print(f"Error loading file: {e}")
        return None

# Example usage
file_path = input("Enter the path to your source file: ")
source_content = load_source_file(file_path)

# Display first few lines of the file (if loaded successfully)
if source_content:
    print("\nPreview of file content:")
    print("-" * 50)
    print("\n".join(source_content.split("\n")[:10]))
    print("-" * 50)

## 2. Parse the File Content

Now we'll analyze the file content to identify sections, functions, and other structural elements.

In [None]:
def parse_python_file(content):
    """
    Parse a Python file to identify its structure.
    
    Args:
        content (str): Content of the Python file
        
    Returns:
        dict: Structured representation of the file
    """
    # Initialize structure
    structure = {
        "imports": [],
        "classes": [],
        "functions": [],
        "main_code": []
    }
    
    # Split content into lines
    lines = content.split("\n")
    
    # Simple parsing logic (can be enhanced for complex files)
    current_block = []
    current_type = "main_code"
    
    for line in lines:
        stripped = line.strip()
        
        # Detect imports
        if re.match(r'^import\s+|^from\s+\w+\s+import', stripped):
            structure["imports"].append(line)
        
        # Detect class definitions
        elif stripped.startswith("class "):
            if current_block and current_type != "main_code":
                structure[current_type].append("\n".join(current_block))
            current_block = [line]
            current_type = "classes"
        
        # Detect function definitions
        elif stripped.startswith("def "):
            if current_block and current_type != "main_code":
                structure[current_type].append("\n".join(current_block))
            current_block = [line]
            current_type = "functions"
        
        # Continue adding to current block
        else:
            if current_type != "main_code":
                current_block.append(line)
            else:
                structure["main_code"].append(line)
    
    # Add the last block if exists
    if current_block and current_type != "main_code":
        structure[current_type].append("\n".join(current_block))
    
    return structure

# Parse the content if it was loaded successfully
if source_content:
    file_structure = parse_python_file(source_content)
    
    # Display summary of parsed structure
    print("\nFile Structure Summary:")
    print(f"Number of imports: {len(file_structure['imports'])}")
    print(f"Number of classes: {len(file_structure['classes'])}")
    print(f"Number of functions: {len(file_structure['functions'])}")
    print(f"Lines of main code: {len(file_structure['main_code'])}")

## 3. Add Titles and Subtitles

In this section, we'll create a function to insert Markdown cells with titles and subtitles to organize the notebook.

In [None]:
def generate_titles_and_subtitles(file_structure, filename):
    """
    Generate titles and subtitles based on the file structure.
    
    Args:
        file_structure (dict): Parsed structure of the file
        filename (str): Name of the source file
        
    Returns:
        list: List of title and subtitle cells
    """
    cells = []
    
    # Main title
    title = f"# {os.path.basename(filename)} Documentation"
    cells.append(new_markdown_cell(title))
    
    # Introduction
    intro = "This notebook provides documentation and explanation for the code in this file."
    cells.append(new_markdown_cell(intro))
    
    # Imports section
    if file_structure["imports"]:
        cells.append(new_markdown_cell("## Imports"))
        cells.append(new_markdown_cell("The following libraries and modules are imported:"))
    
    # Classes section
    if file_structure["classes"]:
        cells.append(new_markdown_cell("## Classes"))
        for i, class_def in enumerate(file_structure["classes"]):
            class_name = re.search(r'class\s+(\w+)', class_def)
            if class_name:
                cells.append(new_markdown_cell(f"### {class_name.group(1)}"))
    
    # Functions section
    if file_structure["functions"]:
        cells.append(new_markdown_cell("## Functions"))
        for i, func_def in enumerate(file_structure["functions"]):
            func_name = re.search(r'def\s+(\w+)', func_def)
            if func_name:
                cells.append(new_markdown_cell(f"### {func_name.group(1)}"))
    
    # Main code section
    if file_structure["main_code"]:
        cells.append(new_markdown_cell("## Main Code"))
        cells.append(new_markdown_cell("The following code executes the main functionality:"))
    
    return cells

# Generate titles and subtitles if content was loaded and parsed
if 'file_structure' in locals() and file_path:
    title_cells = generate_titles_and_subtitles(file_structure, file_path)
    print(f"\nGenerated {len(title_cells)} title and subtitle cells")

## 4. Insert Comments

Now we'll add comments in code cells to explain the purpose and functionality of the code.

In [None]:
def add_code_with_comments(file_structure):
    """
    Add code cells with comments based on the file structure.
    
    Args:
        file_structure (dict): Parsed structure of the file
        
    Returns:
        list: List of code cells with comments
    """
    cells = []
    
    # Add imports
    if file_structure["imports"]:
        import_code = "\n".join(file_structure["imports"])
        cell_content = "# Import statements\n" + import_code
        cells.append(new_code_cell(cell_content))
    
    # Add classes with comments
    for class_def in file_structure["classes"]:
        class_name = re.search(r'class\s+(\w+)', class_def)
        if class_name:
            cell_content = f"# Definition of class {class_name.group(1)}\n" + class_def
            cells.append(new_code_cell(cell_content))
    
    # Add functions with comments
    for func_def in file_structure["functions"]:
        func_name = re.search(r'def\s+(\w+)', func_def)
        if func_name:
            cell_content = f"# Definition of function {func_name.group(1)}\n" + func_def
            cells.append(new_code_cell(cell_content))
    
    # Add main code with comments
    if file_structure["main_code"]:
        main_code = "\n".join(file_structure["main_code"])
        # Add some basic comment
        cell_content = "# Main execution code\n" + main_code
        cells.append(new_code_cell(cell_content))
    
    return cells

# Add code cells with comments if content was loaded and parsed
if 'file_structure' in locals():
    code_cells = add_code_with_comments(file_structure)
    print(f"\nGenerated {len(code_cells)} code cells with comments")

## 5. Generate Documentation

In this section, we'll use docstrings or Markdown cells to provide detailed documentation for the code.

In [None]:
def extract_docstrings(content):
    """
    Extract docstrings from classes and functions.
    
    Args:
        content (str): Function or class definition with docstring
        
    Returns:
        str: Extracted docstring or empty string if none found
    """
    docstring_pattern = r'"""(.*?)"""'
    docstring_match = re.search(docstring_pattern, content, re.DOTALL)
    
    if docstring_match:
        return docstring_match.group(1).strip()
    return ""

def generate_documentation(file_structure):
    """
    Generate documentation markdown cells based on docstrings.
    
    Args:
        file_structure (dict): Parsed structure of the file
        
    Returns:
        list: List of documentation cells
    """
    cells = []
    
    # Document classes
    for class_def in file_structure["classes"]:
        class_name = re.search(r'class\s+(\w+)', class_def)
        if class_name:
            class_name = class_name.group(1)
            docstring = extract_docstrings(class_def)
            
            if docstring:
                doc_content = f"**Documentation for class `{class_name}`:**\n\n{docstring}"
                cells.append(new_markdown_cell(doc_content))
            else:
                cells.append(new_markdown_cell(f"*No documentation available for class `{class_name}`*"))
    
    # Document functions
    for func_def in file_structure["functions"]:
        func_name = re.search(r'def\s+(\w+)', func_def)
        if func_name:
            func_name = func_name.group(1)
            docstring = extract_docstrings(func_def)
            
            if docstring:
                doc_content = f"**Documentation for function `{func_name}`:**\n\n{docstring}"
                cells.append(new_markdown_cell(doc_content))
            else:
                cells.append(new_markdown_cell(f"*No documentation available for function `{func_name}`*"))
    
    return cells

# Generate documentation cells if content was parsed
if 'file_structure' in locals():
    doc_cells = generate_documentation(file_structure)
    print(f"\nGenerated {len(doc_cells)} documentation cells")

## 6. Save as a Jupyter Notebook

Finally, we'll save the restructured content as a Jupyter notebook file (.ipynb).

In [None]:
def create_notebook(title_cells, code_cells, doc_cells, output_path):
    """
    Create a Jupyter notebook from the generated cells.
    
    Args:
        title_cells (list): List of title and subtitle cells
        code_cells (list): List of code cells with comments
        doc_cells (list): List of documentation cells
        output_path (str): Path to save the notebook
        
    Returns:
        bool: True if successful, False otherwise
    """
    try:
        # Create a new notebook
        nb = new_notebook()
        
        # Add an introduction cell
        intro_cell = new_markdown_cell("# File to Notebook Conversion\n\n"
                                      "This notebook was automatically generated from a source file.\n\n"
                                      "It contains the following sections:\n"
                                      "1. File Structure (Titles and Subtitles)\n"
                                      "2. Code with Comments\n"
                                      "3. Documentation")
        nb.cells.append(intro_cell)
        
        # Add all the cells in a logical order
        for cell in title_cells:
            nb.cells.append(cell)
        
        nb.cells.append(new_markdown_cell("# Code Implementation"))
        for cell in code_cells:
            nb.cells.append(cell)
        
        nb.cells.append(new_markdown_cell("# Detailed Documentation"))
        for cell in doc_cells:
            nb.cells.append(cell)
        
        # Write the notebook to file
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(nbformat.writes(nb), f)
        
        print(f"Notebook successfully saved to: {output_path}")
        return True
    
    except Exception as e:
        print(f"Error saving notebook: {e}")
        return False

# Save the notebook if cells were generated
if 'title_cells' in locals() and 'code_cells' in locals() and 'doc_cells' in locals():
    # Generate output path
    if file_path:
        file_name = os.path.basename(file_path)
        file_base = os.path.splitext(file_name)[0]
        output_path = os.path.join("d:\\studies\\s8\\data science\\pythonProject", f"{file_base}_notebook.ipynb")
        
        # Create the notebook
        create_notebook(title_cells, code_cells, doc_cells, output_path)
    else:
        print("No file path provided, cannot create notebook.")

## Summary

This notebook provides a complete workflow for restructuring a source file into a well-organized Jupyter notebook:

1. **Load the Source File**: We read the source file into memory using Python's file handling methods.
2. **Parse the File Content**: We analyzed the file content to identify sections, functions, and other structural elements.
3. **Add Titles and Subtitles**: We inserted Markdown cells with titles and subtitles to organize the notebook.
4. **Insert Comments**: We added comments in code cells to explain the purpose and functionality of the code.
5. **Generate Documentation**: We used docstrings to provide detailed documentation for the code.
6. **Save as a Jupyter Notebook**: We saved the restructured content as a Jupyter notebook file (.ipynb).

This tool is particularly useful for creating documentation from existing code files or for converting script-based projects into interactive notebooks.