# Parse ipynb

In [8]:
import nbformat

def parse_ipynb(ipynb_path):
    """
    Parses a Jupyter Notebook (.ipynb) file and extracts code and markdown cells.

    Args:
        ipynb_path: The path to the .ipynb file.

    Returns:
        A dictionary with lists 'code' (Python code blocks) and 'markdown' (Markdown cells).
        Returns None if an error occurs (file not found, parsing error, etc.).
    """
    try:
        with open(ipynb_path, 'r', encoding='utf-8') as f:  # Specify UTF-8 encoding for broader compatibility
            nb = nbformat.read(f, as_version=4) # Read and ensure version 4

        code_cells = []
        markdown_cells = []
        raw_cells = [] #Added to capture cells that are neither code nor markdown

        for cell in nb.cells:
            if cell.cell_type == "code":
                code_cells.append(cell.source)
            elif cell.cell_type == "markdown":
                markdown_cells.append(cell.source)
            else:
                raw_cells.append(cell) #Append other cell types


        return {"code": code_cells, "markdown": markdown_cells, "raw": raw_cells}

    except FileNotFoundError:
        print(f"Error: File not found at '{ipynb_path}'")
        return None
    except nbformat.reader.NotJSONError:
        print(f"Error: Invalid JSON format in '{ipynb_path}'")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None


# Example usage:
ipynb_file_path = "ConvolutionalNeuralNetwork_Prediction_toc.ipynb"  # Replace with your .ipynb file path

result = parse_ipynb(ipynb_file_path)

In [9]:
result['markdown']

['# Table of Contents\n  - [Install Libraries](#install-libraries)\n- [Data Downloading](#data-downloading)\n- [GAF](#gaf)\n- [Train Test Split](#train-test-split)\n- [Applying Class Weights](#applying-class-weights)\n- [Model Training and Evaluation](#model-training-and-evaluation)\n',
 '[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1fXhuSLU_CxPIHiMbELwEuuKoJih2SeQs?usp=sharing)',
 "## <a id='install-libraries'></a> Install Libraries",
 "## <a id='data-downloading'></a> Data Downloading",
 "We begin by downloading historical stock price data using the yfinance library. We focus on the 'Close' price as our primary time series for analysis.",
 "## <a id='gaf'></a> GAF\nGramian Angular Field (GAF) is a technique that encodes a time series as a 2D image by representing each data point as a polar coordinate. The radial coordinate is the time point itself, and the angular coordinate is the scaled value of the time series

# Generate TOC

In [7]:
import nbformat
from nbformat.v4 import new_markdown_cell
import os
import re

def generate_toc(nb_path):
    """
    Generates a table of contents for a Jupyter Notebook, adding anchors to headings.

    Args:
        nb_path: Path to the .ipynb file.
    """
    try:
        with open(nb_path, 'r') as f:
            nb = nbformat.read(f, as_version=4)

        toc = "# Table of Contents\n"
        level = 0
        toc_items = []
        heading_anchors = {} # Dictionary to store heading anchors

        for i, cell in enumerate(nb.cells):
            if cell.cell_type == "markdown":
                lines = cell.source.splitlines()
                for line in lines:
                    line = line.strip()
                    match = re.match(r"^#{1,6}\s*(.*)", line) # Match headings with 1 to 6 #s
                    if match:
                        heading_level = len(match.group(0)) - len(match.group(0).lstrip("#"))
                        heading_text = match.group(1).strip()
                        
                        # Create anchor ID (slugify heading text)
                        anchor_id = re.sub(r"[^a-zA-Z0-9]+", "-", heading_text).lower()
                        
                        # Add anchor to original heading
                        modified_line = f"#{'#' * heading_level} <a id='{anchor_id}'></a> {heading_text}"
                        lines[lines.index(line)] = modified_line
                        
                        heading_anchors[anchor_id] = i #Store anchor and cell index

                        while heading_level > level:
                            toc_items.append("  ")
                            level += 1
                        while heading_level < level:
                            toc_items.pop()
                            level -= 1

                        toc_items.append(f"- [{heading_text}](#{anchor_id})\n")

                #Update cell source with modified lines
                cell.source = "\n".join(lines)

        toc += "".join(toc_items)
        toc_cell = new_markdown_cell(toc)
        nb.cells.insert(0, toc_cell)

        base, ext = os.path.splitext(nb_path)
        new_nb_path = f"{base}_toc{ext}"

        with open(new_nb_path, 'w') as f:
            nbformat.write(nb, f)
        print(f"Table of Contents added to {new_nb_path}")

    except FileNotFoundError:
        print(f"Error: File not found at {nb_path}")
    except Exception as e:
        print(f"An error occurred: {e}")



# Example usage:
generate_toc("ConvolutionalNeuralNetwork_Prediction.ipynb")

Table of Contents added to ConvolutionalNeuralNetwork_Prediction_toc.ipynb
