In [7]:
import os
import shutil
from multiprocessing import Pool, cpu_count



def copy_file(args):
    src, dest = args

    # Check if destination file exists
    if os.path.exists(dest):
        # Check if file sizes are the same
        if os.path.getsize(src) == os.path.getsize(dest):
            return
        else:
            print(f"File '{dest}' exists but sizes differ. Proceeding with copy.")


    # Ensure destination directory exists
    if not os.path.exists(os.path.dirname(dest)):
        os.makedirs(os.path.dirname(dest), exist_ok=True)
        print(f"Creating directory: {os.path.dirname(dest)}")

    # Try to copy the file, handle exceptions
    try:
        shutil.copy2(src, dest)
        # print(f"File copied from '{src}' to '{dest}'.")
    except PermissionError:
        print(f"Permission denied: Cannot copy file from '{src}' to '{dest}'.")
    except FileNotFoundError:
        print(f"File not found: '{src}' does not exist.")
    except Exception as e:
        print(f"An error occurred while copying the file: {e}")




def get_all_files(src_folder):
    file_paths = []
    for file in os.listdir(src_folder):
        full_path = os.path.join(src_folder, file)
        if os.path.isfile(full_path):  # Ensure it's a file, not a directory
            file_paths.append(full_path)
    return file_paths

def copy_files_recursively(src_folder, dest_folder):
    all_files = get_all_files(src_folder)
    print(len(all_files), "got in ", src_folder)
    
    # Map source files to their new destination paths
    tasks = []
    for file in all_files:
        relative_path = os.path.relpath(file, src_folder)
        new_path = os.path.join(dest_folder, relative_path)
        tasks.append((file, new_path))
    
    # Use multiprocessing to copy files
    with Pool(cpu_count()-20) as pool:
        pool.map(copy_file, tasks)
    
    # Check for directories and copy them recursively
    for item in os.listdir(src_folder):
        src_item = os.path.join(src_folder, item)
        
        if not os.path.isfile(src_item):  
            dest_item = os.path.join(dest_folder, item)
            print("focusing on , ", src_folder)
            if os.path.isdir(src_item):
                if (".local" in src_item) or (".cache" in src_item) or (".vscode-server" in src_item) or (".venv" in src_item) or ("site-packages" in src_item) or ("herbarium" in src_item):
                    continue
                if not os.path.exists(dest_item):
                    os.makedirs(dest_item)
                
                copy_files_recursively(src_item, dest_item)


print("ok")

def get_all_files_singlular(src_folder):
    file_paths = []
    for root, dirs, files in os.walk(src_folder):
        for file in files:
            full_path = os.path.join(root, file)
            file_paths.append(full_path)
    return file_paths


def copy_files_singlular(src_folder, dest_folder):
    all_files = get_all_files_singlular(src_folder)
    print(len(all_files))
    print(len(all_files), "got in ", src_folder)
    
    # Map source files to their new destination paths
    tasks = []
    for file in all_files:
        relative_path = os.path.relpath(file, src_folder)
        new_path = os.path.join(dest_folder, relative_path)
        tasks.append((file, new_path))
    
    # Use multiprocessing to copy files
    with Pool(cpu_count()-20) as pool:
        pool.map(copy_file, tasks)
        



ok


In [10]:
if __name__ == "__main__":
    src_folder = '/home/research/datasets/'
    dest_folder = '/mnt/storage/research/datasets/'
    copy_files_singlular(src_folder, dest_folder)

5199041
5199041 got in  /home/research/datasets/


In [11]:
if __name__ == "__main__":
    src_folder = '/home/research/shaian/'
    dest_folder = '/mnt/storage/research/shaian/'
    copy_files_singlular(src_folder, dest_folder)

178
178 got in  /home/research/shaian/


In [12]:

if __name__ == "__main__":
    src_folder = '/home/research/vascular-herbarium-take-2/'
    dest_folder = '/mnt/storage/research/vascular-herbarium-take-2/'
    copy_files_singlular(src_folder, dest_folder)

37
37 got in  /home/research/vascular-herbarium-take-2/


In [13]:

if __name__ == "__main__":
    src_folder = '/home/research/vascular-herbarium/'
    dest_folder = '/mnt/storage/research/vascular-herbarium/'
    copy_files_singlular(src_folder, dest_folder)

128
128 got in  /home/research/vascular-herbarium/


In [14]:

if __name__ == "__main__":
    src_folder = '/home/oag6f/herbarium-vgg16-no-pretrain/'
    dest_folder = '/mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/'
    copy_files_singlular(src_folder, dest_folder)

30321
30321 got in  /home/oag6f/herbarium-vgg16-no-pretrain/
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/keras/src/layers/core/__pycache__Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/keras/api/_v1/keras/constraintsCreating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/keras/layers


Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/scipy/spatial/tests/data
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/keras/src/engine/__pycache__Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain/.venv/lib/python3.11/site-packages/scipy/stats/tests/__pycache__
Creatin

In [16]:

if __name__ == "__main__":
    src_folder = '/home/oag6f/herbarium-vgg16-no-pretrain-prefork/'
    dest_folder = '/mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain-prefork/'
    copy_files_singlular(src_folder, dest_folder)

43717
43717 got in  /home/oag6f/herbarium-vgg16-no-pretrain-prefork/
File not found: '/home/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/bin/python' does not exist.
File not found: '/home/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/bin/python3' does not exist.
File not found: '/home/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/bin/python3.11' does not exist.
File '/mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/lib/python3.11/site-packages/scipy/spatial/tests/__pycache__/test__procrustes.cpython-311.pyc' exists but sizes differ. Proceeding with copy.
File '/mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/lib/python3.11/site-packages/scipy/optimize/minpack.py' exists but sizes differ. Proceeding with copy.File '/mnt/storage/DSAIC_Legacy/oag6f/herbarium-vgg16-no-pretrain-prefork/.venv/lib/python3.11/site-packages/keras_core/src/layers/regularization/__pycache__/gaussian_dropout.cpython-311.pyc' exists but sizes differ. Proceedi

In [17]:

if __name__ == "__main__":
    src_folder = '/home/oag6f/vascular-herbarium/'
    dest_folder = '/mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/'
    copy_files_singlular(src_folder, dest_folder)

1258
1258 got in  /home/oag6f/vascular-herbarium/
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.ipynb_checkpoints
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/learn-rate
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/learn-rate
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/prep-work
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/prep-workCreating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.git

Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.git/objects/8a
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.git/objects/36
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.git/objects/b6
Creating directory: /mnt/storage/DSAIC_Legacy/oag6f/vascular-herbarium/.git/objects/0c
Creating directory: /mnt/storage/DSAIC_