# Notebook Alterations

In [36]:
from google.colab import drive
drive.mount('/content/drive')
import os

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [37]:
import os
import shutil
import nbformat

def copy_directory_and_convert_py_to_ipynb(source_directory, destination_directory):
    # Create the destination directory if it doesn't exist
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # Traverse the source directory
    for root, dirs, files in os.walk(source_directory):
        # Create the corresponding directory in the destination
        relative_path = os.path.relpath(root, source_directory)
        destination_path = os.path.join(destination_directory, relative_path)

        if not os.path.exists(destination_path):
            os.makedirs(destination_path)

        # Copy each file
        for file in files:
            source_file_path = os.path.join(root, file)
            destination_file_path = os.path.join(destination_path, file)

            # If the file is a Python script, convert it to a Jupyter notebook
            if file.endswith('.py'):
                with open(source_file_path, 'r') as py_file:
                    code = py_file.read()

                # Create a new notebook
                notebook = nbformat.v4.new_notebook()
                notebook.cells.append(nbformat.v4.new_code_cell(code))

                # Replace .py with .ipynb
                destination_file_path = destination_file_path.replace('.py', '.ipynb')

                # Write the notebook to the destination
                with open(destination_file_path, 'w') as ipynb_file:
                    nbformat.write(notebook, ipynb_file)

            else:
                # Copy non-Python files as-is
                shutil.copy2(source_file_path, destination_file_path)

    print(f"Directory copied from {source_directory} to {destination_directory}, and .py files were converted to .ipynb")

# Example usage:
source_dir = '/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main'
destination_dir = '/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy'
copy_directory_and_convert_py_to_ipynb(source_dir, destination_dir)


Directory copied from /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main to /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy, and .py files were converted to .ipynb


In [38]:
import os
import nbformat
import re

def find_internal_imports(notebook_path):
    """Find all import statements in a notebook."""
    with open(notebook_path, 'r') as f:
        notebook = nbformat.read(f, as_version=4)

    # Regular expression to match import statements
    import_re = re.compile(r'^\s*import (\w+)|^\s*from (\w+)')

    imports_to_fix = set()
    for cell in notebook.cells:
        if cell.cell_type == 'code':
            for line in cell.source.split('\n'):
                match = import_re.match(line)
                if match:
                    module_name = match.group(1) or match.group(2)
                    imports_to_fix.add(module_name)

    return imports_to_fix

def fix_notebook_imports(notebook_path, internal_modules, source_dir):
    """Fix internal imports in a notebook to point to the corresponding .py files."""
    with open(notebook_path, 'r') as f:
        notebook = nbformat.read(f, as_version=4)

    # Regular expression to match import statements
    import_re = re.compile(r'^\s*import (\w+)|^\s*from (\w+)')

    # Create a code cell that mounts the Google Drive and adds necessary directories to sys.path
    drive_mount_code = """
from google.colab import drive
import sys
drive.mount('/content/drive')

# Add necessary directories to sys.path
sys.path.append('/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main/data')
sys.path.append('/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main/model')
sys.path.append('/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main/train')
sys.path.append('/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main/test')
"""

    # Add this cell to the beginning of the notebook
    drive_mount_cell = nbformat.v4.new_code_cell(drive_mount_code)
    notebook.cells.insert(0, drive_mount_cell)

    # Now fix the imports
    for cell in notebook.cells:
        if cell.cell_type == 'code':
            fixed_source = []
            for line in cell.source.split('\n'):
                match = import_re.match(line)
                if match:
                    module_name = match.group(1) or match.group(2)
                    if module_name in internal_modules:
                        # Fix the import statement to use the mounted path
                        new_import = f"import {module_name}"
                        fixed_source.append(new_import)
                    else:
                        fixed_source.append(line)
                else:
                    fixed_source.append(line)
            cell.source = '\n'.join(fixed_source)

    # Save the modified notebook
    with open(notebook_path, 'w') as f:
        nbformat.write(notebook, f)
    print(f"Fixed imports in {notebook_path}")

def process_notebooks(directory, source_dir):
    """Process all notebooks in the directory to fix internal imports."""
    # List all .py modules in the source directory to identify internal modules
    internal_modules = set()
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.endswith('.py'):
                module_name = file.replace('.py', '')
                internal_modules.add(module_name)

    # Process each notebook in the target directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.ipynb'):
                notebook_path = os.path.join(root, file)
                print(f"Processing {notebook_path}...")
                fix_notebook_imports(notebook_path, internal_modules, source_dir)

# Example usage
notebook_dir = '/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy'
source_dir = '/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main'
process_notebooks(notebook_dir, source_dir)


Processing /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/final_train.ipynb...
Fixed imports in /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/final_train.ipynb
Processing /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/dataset.ipynb...
Fixed imports in /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/dataset.ipynb
Processing /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/simple.ipynb...
Fixed imports in /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/simple.ipynb
Processing /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/context.ipynb...
Fixed imports in /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/context.ipynb
Processing /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/train_demo.ipynb...
Fixed imports in /content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy/train/train_demo.i

# Troubleshooting


In [40]:
def list_directory_contents(directory):
    """Prints all files and directories in the given directory recursively."""
    try:
        for root, dirs, files in os.walk(directory):
            level = root.replace(directory, '').count(os.sep)
            indent = ' ' * 4 * level
            print(f"{indent}|-- {os.path.basename(root)}/")
            sub_indent = ' ' * 4 * (level + 1)
            for f in files:
                print(f"{sub_indent}|-- {f}")
    except Exception as e:
        print(f"Error accessing directory {directory}: {e}")

# Example usage:
directory = '/content/drive/My Drive/AbelBioToken-main/AbelBioToken-main-copy'
list_directory_contents(directory)


|-- AbelBioToken-main-copy/
    |-- final_train.ipynb
    |-- random_test_results.csv
    |-- .gitignore
    |-- LICENSE
    |-- README.MD
    |-- test.svg
    |-- train/
        |-- dataset.ipynb
        |-- simple.ipynb
        |-- context.ipynb
        |-- train_demo.ipynb
        |-- train_process.ipynb
    |-- model/
        |-- block.ipynb
        |-- decoder.ipynb
        |-- embedding.ipynb
        |-- encoder.ipynb
        |-- transformer.ipynb
    |-- data/
        |-- cap.yaml
        |-- aa_data.csv
        |-- base_test_data.csv
        |-- generate_dataset.ipynb
        |-- target_vocab.csv
        |-- sets.yaml
        |-- data_segregate.ipynb
        |-- generator.ipynb
        |-- structure.yaml
        |-- train_data.csv
        |-- test_data.csv
        |-- test.svg
        |-- val_data.csv
        |-- __pycache__/
            |-- generator.cpython-310.pyc
    |-- test/
        |-- test_model.ipynb
        |-- context.ipynb
