In [None]:
import os
import io
import nbformat
import pandas as pd
from google.cloud import storage
from datetime import datetime
import getpass

# Configuration parameters
LOCAL_NOTEBOOK_DIR = r"D:\Paypal Migration Project\Jupyter Migration Project"
BUCKET_NAME = "jupyter-notebooks-bucket"  # replace with your bucket name
SERVICE_ACCOUNT_FILE = r"path\Your_service_accountFile.json"
TARGET_KERNEL_VERSION = "3.7.0"
output_csv = r"D:\Paypal Migration Project\Output_csv_file\migration_report.csv"


In [25]:
def get_notebook_metadata(local_path, nb):
    """
    Extracts metadata about the notebook file.
    """
    file_stats = os.stat(local_path)

    # Kernel info
    kernel_name = nb.metadata.get("kernelspec", {}).get("name", "N/A")
    kernel_version = nb.metadata.get("language_info", {}).get("version", "N/A")

    # File extension
    file_extension = os.path.splitext(local_path)[1]

    # Cell info
    total_cells = len(nb.cells)
    execution_counts = [
        cell.get("execution_count") for cell in nb.cells if cell.get("execution_count") is not None
    ]
    times_executed = len(execution_counts)
    last_execution = max(execution_counts) if execution_counts else None

    # File creation time
    creation_time = datetime.fromtimestamp(file_stats.st_ctime)

    # File owner (current user running the script)
    owner = getpass.getuser()

    return {
        "full_path": local_path,
        "size_bytes": file_stats.st_size,
        "kernel_name": kernel_name,
        "kernel_version": kernel_version,
        "file_extension": file_extension,
        "total_cells": total_cells,
        "times_executed": times_executed,
        "last_execution_count": last_execution,
        "owner": owner,
        "creation_time": creation_time
    }


In [30]:
def migrate_kernel_in_memory(nb_path, target_version):
    """
    Read a notebook from nb_path and update its kernel version metadata.
    Returns the updated notebook object (in memory).
    """
    nb = nbformat.read(nb_path, as_version=4)

    # First checking the kernelspec and Update kernel display name
    if "kernelspec" in nb.metadata:
        nb.metadata["kernelspec"]["display_name"] = f"Python {target_version}"
    else:
        nb.metadata["kernelspec"] = {
            "display_name": f"Python {target_version}",
            "language": "python",
            "name": "python3"
        }

    # Update language_info
    if "language_info" in nb.metadata:
        nb.metadata["language_info"]["version"] = target_version
    else:
        nb.metadata["language_info"] = {"name": "python", "version": target_version}

    return nb



In [31]:
def upload_notebook_to_gcs(nb, bucket_name, blob_path, service_account_file):
    """
    Upload a notebook object directly to GCS without saving locally.
    Maintains folder structure using blob_path.
    """
    client = storage.Client.from_service_account_json(service_account_file)
    bucket = client.bucket(bucket_name)

    # Convert notebook object to string in memory
    notebook_bytes = io.StringIO()
    nbformat.write(nb, notebook_bytes)
    notebook_bytes.seek(0)

    # Upload to GCS
    blob = bucket.blob(blob_path.replace("\\", "/"))
    blob.upload_from_string(notebook_bytes.getvalue(), content_type="application/x-ipynb+json")
    
    gcs_path = blob_path.replace("\\", "/")
    print(f"Upload completed to gs://{bucket_name}/{gcs_path}")

In [36]:
def process_all_notebooks():
    """
    Walk through all notebooks in LOCAL_NOTEBOOK_DIR, migrate kernel,
    collect both original and migrated metadata into a DataFrame, 
    upload to GCS, and track total files processed.
    """
    all_metadata = []  # collect all notebook metadata here
    folder_counter = 0  # number folders
    total_files = 0    # total files processed across all folders

    for root, _, files in os.walk(LOCAL_NOTEBOOK_DIR):
        if ".ipynb_checkpoints" in root:
            continue  # skip hidden checkpoint folders

        folder_counter += 1
        print(f"\n[{folder_counter}] Processing folder: {root}")

        file_counter = 0
        for file in files:
            if file.endswith(".ipynb"):
                file_counter += 1
                total_files += 1
                local_path = os.path.join(root, file)

                print(f"   [{folder_counter}.{file_counter}] Processing file: {file} (Total processed: {total_files})")

                # Step 1: Read notebook (before migration)
                nb_original = nbformat.read(local_path, as_version=4)
                original_metadata = get_notebook_metadata(local_path, nb_original)

                # Step 2: Migrate kernel
                nb_migrated = migrate_kernel_in_memory(local_path, TARGET_KERNEL_VERSION)

                # Step 3: Get migrated metadata
                migrated_metadata = get_notebook_metadata(local_path, nb_migrated)

                # Step 4: Merge both sets of metadata
                record = {
                    "owner" : original_metadata["owner"],
                    "file_creation_time" : original_metadata["creation_time"],
                    "full_path": local_path,
                    "size_bytes": original_metadata["size_bytes"],
                    "kernel_name": original_metadata["kernel_name"],
                    "original_kernel_version": original_metadata["kernel_version"],
                    "migrated_kernel_version": migrated_metadata["kernel_version"],
                    "file_extension": original_metadata["file_extension"],
                    "total_cells": original_metadata["total_cells"],
                    "times_executed": original_metadata["times_executed"],
                    "last_execution_count": original_metadata["last_execution_count"],
                }
                all_metadata.append(record)

                # Step 5: Maintain folder structure in GCS
                rel_path = os.path.relpath(local_path, LOCAL_NOTEBOOK_DIR)

                # Step 6: Upload migrated notebook to GCS
                upload_notebook_to_gcs(nb_migrated, BUCKET_NAME, rel_path, SERVICE_ACCOUNT_FILE)

    # Convert collected metadata to DataFrame
    df = pd.DataFrame(all_metadata)
    print("\n Migration Summary:")
    display(df.head())

    # Save as CSV for audit
    output_csv = r"D:\Paypal Migration Project\Output_csv_file\migration_report.csv"
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    df.to_csv(output_csv, index=False)
    print(f"\n Metadata report saved at: {output_csv}")

    # Final summary
    print(f"\n Total folders processed: {folder_counter}")
    print(f"\n Total notebooks processed/migrated: {total_files}")

    return df

In [37]:
if __name__ == "__main__":
    df = process_all_notebooks()


[1] Processing folder: D:\Paypal Migration Project\Jupyter Migration Project

[2] Processing folder: D:\Paypal Migration Project\Jupyter Migration Project\product_A
   [2.1] Processing file: analysis.ipynb (Total processed: 1)
Upload completed to gs://jupyter-notebooks-bucket/product_A/analysis.ipynb
   [2.2] Processing file: exploration.ipynb (Total processed: 2)
Upload completed to gs://jupyter-notebooks-bucket/product_A/exploration.ipynb

[3] Processing folder: D:\Paypal Migration Project\Jupyter Migration Project\product_B

[4] Processing folder: D:\Paypal Migration Project\Jupyter Migration Project\product_B\team1
   [4.1] Processing file: etl.ipynb (Total processed: 3)
Upload completed to gs://jupyter-notebooks-bucket/product_B/team1/etl.ipynb

[5] Processing folder: D:\Paypal Migration Project\Jupyter Migration Project\product_B\team2
   [5.1] Processing file: visualize.ipynb (Total processed: 4)
Upload completed to gs://jupyter-notebooks-bucket/product_B/team2/visualize.ipynb


Unnamed: 0,owner,file_creation_time,full_path,size_bytes,kernel_name,original_kernel_version,migrated_kernel_version,file_extension,total_cells,times_executed,last_execution_count
0,Rahul Malviya,2025-09-12 00:06:56.947602,D:\Paypal Migration Project\Jupyter Migration ...,948,python3,3.5.0,3.7.0,.ipynb,1,0,
1,Rahul Malviya,2025-09-12 00:07:18.488813,D:\Paypal Migration Project\Jupyter Migration ...,972,python3,3.5.0,3.7.0,.ipynb,1,0,
2,Rahul Malviya,2025-09-12 00:08:11.915240,D:\Paypal Migration Project\Jupyter Migration ...,788,python3,3.5.0,3.7.0,.ipynb,1,0,
3,Rahul Malviya,2025-09-12 00:08:35.031760,D:\Paypal Migration Project\Jupyter Migration ...,881,python3,3.5.0,3.7.0,.ipynb,1,0,
4,Rahul Malviya,2025-09-12 00:08:52.290868,D:\Paypal Migration Project\Jupyter Migration ...,836,python3,3.5.0,3.7.0,.ipynb,1,0,



 Metadata report saved at: D:\Paypal Migration Project\Output_csv_file\migration_report.csv

 Total folders processed: 6

 Total notebooks processed/migrated: 6
