In [2]:
import os
import shutil
from multiprocessing import Pool, cpu_count



def copy_file(args):
    src, dest = args

    # Check if destination file exists
    if os.path.exists(dest):
        # Check if file sizes are the same
        if os.path.getsize(src) == os.path.getsize(dest):
            return
        else:
            print(f"File '{dest}' exists but sizes differ. Proceeding with copy.")


    # Ensure destination directory exists
    if not os.path.exists(os.path.dirname(dest)):
        os.makedirs(os.path.dirname(dest), exist_ok=True)
        print(f"Creating directory: {os.path.dirname(dest)}")

    # Try to copy the file, handle exceptions
    try:
        shutil.copy2(src, dest)
        # print(f"File copied from '{src}' to '{dest}'.")
    except PermissionError:
        print(f"Permission denied: Cannot copy file from '{src}' to '{dest}'.")
    except FileNotFoundError:
        print(f"File not found: '{src}' does not exist.")
    except Exception as e:
        print(f"An error occurred while copying the file: {e}")




def get_all_files(src_folder):
    file_paths = []
    for file in os.listdir(src_folder):
        full_path = os.path.join(src_folder, file)
        if os.path.isfile(full_path):  # Ensure it's a file, not a directory
            file_paths.append(full_path)
    return file_paths

def copy_files_recursively(src_folder, dest_folder):
    all_files = get_all_files(src_folder)
    print(len(all_files), "got in ", src_folder)
    
    # Map source files to their new destination paths
    tasks = []
    for file in all_files:
        relative_path = os.path.relpath(file, src_folder)
        new_path = os.path.join(dest_folder, relative_path)
        tasks.append((file, new_path))
    
    # Use multiprocessing to copy files
    with Pool(cpu_count()-20) as pool:
        pool.map(copy_file, tasks)
    
    # Check for directories and copy them recursively
    for item in os.listdir(src_folder):
        src_item = os.path.join(src_folder, item)
        if src_item == src_folder:
            continue
        if not os.path.isfile(src_item):  
            dest_item = os.path.join(dest_folder, item)
            print("focusing on , ", src_folder)
            if os.path.isdir(src_item):
                if (".local" in src_item) or (".cache" in src_item) or (".vscode-server" in src_item) or (".venv" in src_item) or ("site-packages" in src_item):
                    continue
                if not os.path.exists(dest_item):
                    os.makedirs(dest_item)
                
                copy_files_recursively(src_item, dest_item)


print("ok")

ok


In [4]:
if __name__ == "__main__":
    legacy_name = "DSAIC_Legacy"
    legacy_users =[
    "ablkkn",
    "ad5f2",
    "rmyhw",
    "spd6h",
    "tucdm",
    "dtnft6", 
    "hspcff",       
    "rtbr4"
    ]
    for user_name in legacy_users:
        src_folder = f'/home/{user_name}'
        dest_folder = f'/mnt/storage/{legacy_name}/{user_name}'
        copy_files_recursively(src_folder, dest_folder)
        print("completed the copy for the user ", user_name)

5 got in  /home/ablkkn
focusing on ,  /home/ablkkn
completed the copy for the user  ablkkn
9 got in  /home/ad5f2
focusing on ,  /home/ad5f2
2 got in  /home/ad5f2/FPHLM_af)4
focusing on ,  /home/ad5f2
completed the copy for the user  ad5f2
5 got in  /home/rmyhw
focusing on ,  /home/rmyhw
focusing on ,  /home/rmyhw
0 got in  /home/rmyhw/.jupyter
focusing on ,  /home/rmyhw/.jupyter
0 got in  /home/rmyhw/.jupyter/lab
focusing on ,  /home/rmyhw/.jupyter/lab
1 got in  /home/rmyhw/.jupyter/lab/workspaces
completed the copy for the user  rmyhw
14 got in  /home/spd6h
focusing on ,  /home/spd6h
1 got in  /home/spd6h/SafeGraph_Datasets
focusing on ,  /home/spd6h/SafeGraph_Datasets
1 got in  /home/spd6h/SafeGraph_Datasets/.ipynb_checkpoints
focusing on ,  /home/spd6h
completed the copy for the user  spd6h
4 got in  /home/tucdm
focusing on ,  /home/tucdm
0 got in  /home/tucdm/.config
focusing on ,  /home/tucdm/.config
1 got in  /home/tucdm/.config/configstore
focusing on ,  /home/tucdm
focusing on 