In [19]:
import nbformat
import pandas as pd
import re
from collections import OrderedDict


In [20]:
import nbformat
import pandas as pd
import re
from collections import OrderedDict

def extract_three_columns(notebook_path):
    """
    Returns a DataFrame with three columns:
      - Magic Commands
      - Imported Modules
      - Custom Functions
    Each column contains items found in the notebook; rows are padded with empty strings.
    """
    # read notebook
    with open(notebook_path, "r", encoding="utf-8") as f:
        nb = nbformat.read(f, as_version=4)

    magics = OrderedDict()   # preserve order & dedupe
    imports = OrderedDict()
    funcs = OrderedDict()

    # regexes
    re_from = re.compile(r'^\s*from\s+([A-Za-z0-9_\.]+)\s+import\b')
    re_import = re.compile(r'^\s*import\s+(.+)')
    re_def = re.compile(r'^\s*(?:async\s+)?def\s+([A-Za-z0-9_]+)\s*\(')

    for cell in nb.cells:
        if cell.cell_type != "code":
            continue
        for line in cell.source.splitlines():
            stripped = line.strip()
            if not stripped or stripped.startswith("#"):
                continue

            # Magic commands (line or cell magics)
            if stripped.startswith("%"):
                magic = stripped.split()[0]   # keep first token (% or %%... )
                magics.setdefault(magic, None)

            # from X import ...
            m_from = re_from.match(line)
            if m_from:
                root = m_from.group(1).split('.')[0]
                if root and root != '.':
                    imports.setdefault(root, None)

            # import a, b as c  (handle comma-separated imports)
            m_imp = re_import.match(line)
            if m_imp:
                rest = m_imp.group(1).split('#')[0]    # drop inline comment
                parts = [p.strip() for p in rest.split(',') if p.strip()]
                for part in parts:
                    root = part.split()[0].split('.')[0]   # take root module only
                    if root and root != '.':
                        imports.setdefault(root, None)

            # function definitions (handles "async def" too)
            m_def = re_def.match(line)
            if m_def:
                funcs.setdefault(m_def.group(1), None)

    magics_list = list(magics.keys())
    imports_list = list(imports.keys())
    funcs_list = list(funcs.keys())

    # pad to same length
    max_len = max(len(magics_list), len(imports_list), len(funcs_list))
    magics_list += [""] * (max_len - len(magics_list))
    imports_list += [""] * (max_len - len(imports_list))
    funcs_list += [""] * (max_len - len(funcs_list))

    df = pd.DataFrame({
        "Magic Commands": magics_list,
        "Imported Modules": imports_list,
        "Custom Functions": funcs_list
    })

    return df

In [21]:
if __name__ == "__main__":
    notebook_path = r"D:\Paypal Migration Project\Magic_commands_NB_migration\test_nb.ipynb"
    df = extract_three_columns(notebook_path)

    # If running in terminal:
    display(df)

Unnamed: 0,Magic Commands,Imported Modules,Custom Functions
0,%lsmagic,os,compute_stats
1,%matplotlib,pathlib,add_two_numbers
2,%load_ext,pandas,
3,%autoreload,numpy,
4,%pwd,matplotlib,
5,%time,seaborn,
6,%timeit,,
