## This notebook allows you to 
### 1. transfer data from the NAS to your local computer with the option to exclude some file types (e.g., .avi)
### 2. add an prefix to all your files and folder in your dataset 
### 3. generate a .tar.gz archive excluding some file types (e.g., .avi)
### 4. delete all the folders starting with a prefix

### 1. transfer data from the NAS to your local computer with the option to exclude some file types (e.g., .avi)

In [1]:
AurelienAnimalsID = [
    "MOUEml1_5", "MOUEml1_8", "MOUEml1_11", "MOUEml1_12", "MOUEml1_13", "MOUEml1_15", "MOUEml1_18", "MOUEml1_20",
    "MOURhoA_2", "MOURhoA_5", "MOURhoA_6", "MOURhoA_8", "MOURhoA_9", "MOURhoA_12", "MOURhoA_14",
    "MOUB6NN_2", "MOUB6NN_4", "MOUB6NN_6", "MOUB6NN_9", "MOUB6NN_11", "MOUB6NN_13", "MOUB6NN_15"
]

MaudAnimalsID=['MOU3974', 'MOU3975', 'MOU3987', 'MOU3988', 'MOU3991', 'MOU3992', 'MOU4551', 'MOU4552', 'MOU4560', 'MOU4561', 'MOU4562', 'MOU4563', 'MOU4623', 'MOU4624']
# Source and destination directories

data_sourcefolderpath = "/NAS02/AurelienData/"
#data_sourcefolderpath = "/NAS02/MaudData/"
data_sourcefolderpath = "/NAS02/MashaData/"

data_destinationfolderpath = "/LocalData/ForagingMice/4TowersTaskMethodPaper_Data/MashaData/"
#data_destinationfolderpath = "/LocalData/ForagingMice/4TowersTaskMethodPaper_Data/MaudData/"

# List of folders to transfer
# mice_folders_to_transfer = ['MOUKA266', 'MOUKA267', 'MOUKA268', 'MOUKA269', 'MOUKA270',
#                     'MOUKA271', 'MOUKA274', 'MOUKA275', 'MOUKA276', 'MOUKA277', 
#                     'MOUKA278', 'MOUKA279', 'MOUKA286', 'MOUKA287', 'MOUKA288',
#                     'MOUKA289', 'MOUKA290', 'MOUKA291', 'MOUKA296', 'MOUKA297',
#                     'MOUKA298', 'MOUKA299', 'MOUKA300', 'MOUKA301', 'MOUKA302', 
#                     'MOUKA303', 'MOUKA304', 'MOUKA305', 'MOUKA306', 'MOUKA307',
#                     'MOUKA308', 'MOUKA309', 'MOUKA310', 'MOUKA311', 'MOUKA312', 'MOUKA313']
mice_folders_to_transfer=AurelienAnimalsID


In [2]:
import subprocess

def run_rsync(source, destination, exclude_patterns=["*.avi"]):
    """
    Runs rsync to transfer files while excluding specified patterns.

    :param source: Source directory path
    :param destination: Destination directory path
    :param exclude_patterns: List of file patterns to exclude (default: ['*.avi']).
                             If set to None, no exclusions will be applied.
    """
    rsync_command = ['rsync', '-avz']

    # Add exclude patterns if not None
    if exclude_patterns is not None:
        for pattern in exclude_patterns:
            rsync_command.extend(['--exclude', pattern])

    # Add source and destination paths
    rsync_command.extend([source, destination])

    try:
        subprocess.run(rsync_command, check=True)
        excluded_msg = f"(excluding: {', '.join(exclude_patterns)})" if exclude_patterns else "(no exclusions)"
        print(f"Successfully transferred {source} to {destination} {excluded_msg}")
    except subprocess.CalledProcessError as e:
        print(f"Error transferring {source} to {destination}: {e}")

# Example usage:
# run_rsync('/source/path/', '/destination/path/')  # Excludes *.avi by default
# run_rsync('/source/path/', '/destination/path/', exclude_patterns=['*.mp4', '*.tmp'])  # Custom exclusions
# run_rsync('/source/path/', '/destination/path/', exclude_patterns=None)  # No exclusions


In [4]:
run_rsync(data_sourcefolderpath, data_destinationfolderpath)

sending incremental file list
created directory /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/MashaData
./
Labbook_Mouse_Example
MOU21_1/
MOU21_1/Labbook_Mouse_21_1
MOU21_1/MOU21_1_20250417-0803/
MOU21_1/MOU21_1_20250417-0803/MOU21_1_20250417-0803_centroidTXY.csv
MOU21_1/MOU21_1_20250417-0803/MOU21_1_20250417-0803_sessionparam.csv
MOU21_1/MOU21_1_20250417-0803/MOU21_1_20250417-0803_turnsinfo.csv
MOU21_1/MOU21_1_20250417-1259/
MOU21_1/MOU21_1_20250417-1259/MOU21_1_20250417-1259_centroidTXY.csv
MOU21_1/MOU21_1_20250417-1259/MOU21_1_20250417-1259_sessionparam.csv
MOU21_1/MOU21_1_20250417-1259/MOU21_1_20250417-1259_turnsinfo.csv
MOU21_1/MOU21_1_20250418-0800/
MOU21_1/MOU21_1_20250418-0800/MOU21_1_20250418-0800_centroidTXY.csv
MOU21_1/MOU21_1_20250418-0800/MOU21_1_20250418-0800_sessionparam.csv
MOU21_1/MOU21_1_20250418-0800/MOU21_1_20250418-0800_turnsinfo.csv
MOU21_1/MOU21_1_20250418-1253/
MOU21_1/MOU21_1_20250418-1253/MOU21_1_20250418-1253_centroidTXY.csv
MOU21_1/MOU21_1_20250418-125

In [None]:

# Loop through each folder and use rsync to transfer it, excluding .avi files
for folder in mice_folders_to_transfer:
    source_folder = data_sourcefolderpath + folder + '/'
    destination_folder = data_destinationfolderpath + folder + '/'
    run_rsync(source_folder, destination_folder, exclude_patterns=["*.avi"])

### 2.  a code to add the MOU prefix to all files and folders

In [2]:
import os

def add_prefix_recursively(root_path, prefix="MOU", test_mode=False):
    for dirpath, dirnames, filenames in os.walk(root_path, topdown=False):
        # Rename files
        for filename in filenames:
            old_path = os.path.join(dirpath, filename)
            new_path = os.path.join(dirpath, prefix + filename)
            print(f"Renaming file: {old_path} -> {new_path}")
            if not test_mode:
                os.rename(old_path, new_path)

        # Rename directories
        for dirname in dirnames:
            old_path = os.path.join(dirpath, dirname)
            new_path = os.path.join(dirpath, prefix + dirname)
            print(f"Renaming directory: {old_path} -> {new_path}")
            if not test_mode:
                os.rename(old_path, new_path)

if __name__ == "__main__":
    data_sourcefolderpath = "/NAS02/AurelienData/"
    test_mode = False  # Set to False to actually rename
    add_prefix_recursively(data_sourcefolderpath, test_mode=test_mode)


Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/B6NN_1_20240812-1150.avi -> /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/MOUB6NN_1_20240812-1150.avi
Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/B6NN_1_20240812-1150_sessionparam.csv -> /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/MOUB6NN_1_20240812-1150_sessionparam.csv
Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/B6NN_1_20240812-1150_centroidTXY.csv -> /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/MOUB6NN_1_20240812-1150_centroidTXY.csv
Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/B6NN_1_20240812-1150_turnsinfo.csv -> /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1150/MOUB6NN_1_20240812-1150_turnsinfo.csv
Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1633/B6NN_1_20240812-1633.avi -> /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1633/MOUB6NN_1_20240812-1633.avi
Renaming file: /NAS02/AurelienData/B6NN_1/B6NN_1_20240812-1633/B6NN_1_20240812-1633_sessionpara

## 3. A code to generate  a tar file of the data folder excluding avi files

In [10]:
import tarfile


def create_tar_gz(folder_path, output_path, output_filename, exclude_extensions):
    # Ensure the output directory exists
    os.makedirs(output_path, exist_ok=True)

    # Full path for the output tar.gz file
    output_filepath = os.path.join(output_path, output_filename)

    with tarfile.open(output_filepath, "w:gz") as tar:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Check if the file should be excluded based on its extension
                if not any(file.endswith(ext) for ext in exclude_extensions):
                    full_path = os.path.join(root, file)
                    # Add the file to the tar.gz archive
                    tar.add(full_path, arcname=os.path.relpath(full_path, folder_path))
                    print(f"Added: {full_path}")  # Debugging line to show which files are added
                else:
                    print(f"Excluded: {file}")  # Debugging line to show which files are excluded

# Example usage
folder_path =  'path/to/your/folder'
folder_path = '/LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData'
output_path = '/LocalData/ForagingMice/4TowersTaskMethodPaper_Data'
output_filename = 'AurelienData.tar.gz'
exclude_extensions = ('.avi',)  # Extensions to exclude

create_tar_gz(folder_path, output_path, output_filename, exclude_extensions)

Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240617-1256/MOURhoA_8_20240617-1256_centroidTXY.csv
Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240617-1256/MOURhoA_8_20240617-1256_turnsinfo.csv
Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240617-1256/MOURhoA_8_20240617-1256_basic_processing_output.pickle
Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240617-1256/MOURhoA_8_20240617-1256_sessionparam.csv
Excluded: MOURhoA_8_20240617-1256.avi
Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240617-1256/MOURhoA_8_20240617-1256_overall_trajectory_correlations_per_session.pickle
Excluded: MOURhoA_8_20240619-1258.avi
Added: /LocalData/ForagingMice/4TowersTaskMethodPaper_Data/AurelienData/MOURhoA_8/MOURhoA_8_20240619-1258/MOURhoA_8_20240619-1258_turnsinfo.csv
A

### 4. Python function that deletes all folders starting with a given prefix inside a specified directory

###  it contains a simulate option to print the list of folders that would be deleted without actually deleting them. Set simulate=True when calling the function to preview the deletions.

In [None]:
import os
import shutil

def delete_folders_by_prefix(input1: str, input2: str, simulate: bool = False):
    """
    Deletes all folders inside the given directory (input1) that start with the given prefix (input2).
    
    :param input1: Path to the directory where folders should be deleted.
    :param input2: Prefix of the folders to be deleted.
    :param simulate: If True, only prints the folders that would be deleted without actually deleting them.
    """
    if not os.path.isdir(input1):
        print(f"Error: {input1} is not a valid directory.")
        return
    
    folders_to_delete = []
    for folder in os.listdir(input1):
        folder_path = os.path.join(input1, folder)
        if os.path.isdir(folder_path) and folder.startswith(input2):
            folders_to_delete.append(folder_path)
    
    if simulate:
        print("Folders that would be deleted:")
        for folder in folders_to_delete:
            print(folder)
    else:
        for folder in folders_to_delete:
            try:
                shutil.rmtree(folder)
                print(f"Deleted: {folder}")
            except Exception as e:
                print(f"Failed to delete {folder}: {e}")

In [4]:
delete_folders_by_prefix('/media/david/PavData5','MOU',simulate=True)

Folders that would be deleted:
/media/david/PavData5/MOU001
/media/david/PavData5/MOU002
/media/david/PavData5/MOU004
/media/david/PavData5/MOU006
/media/david/PavData5/MOU007
/media/david/PavData5/MOU013
/media/david/PavData5/MOU015
/media/david/PavData5/MOU016
/media/david/PavData5/MOU017
/media/david/PavData5/MOU018
/media/david/PavData5/MOU019
/media/david/PavData5/MOU024
/media/david/PavData5/MOU025
/media/david/PavData5/MOU026
/media/david/PavData5/MOU027
/media/david/PavData5/MOU028
/media/david/PavData5/MOU029
/media/david/PavData5/MOU030
/media/david/PavData5/MOU031
/media/david/PavData5/MOU032
/media/david/PavData5/MOU033
/media/david/PavData5/MOU074
/media/david/PavData5/MOU075
/media/david/PavData5/MOU079
/media/david/PavData5/MOU093
/media/david/PavData5/MOU094
/media/david/PavData5/MOU100
/media/david/PavData5/MOU101
/media/david/PavData5/MOU102
/media/david/PavData5/MOU118
/media/david/PavData5/MOU119
/media/david/PavData5/MOU120
/media/david/PavData5/MOU121
/media/david

In [6]:
delete_folders_by_prefix('/media/david/PavData5','MOU',simulate=False)

Deleted: /media/david/PavData5/MOU001
Deleted: /media/david/PavData5/MOU002
Deleted: /media/david/PavData5/MOU004
Deleted: /media/david/PavData5/MOU006
Deleted: /media/david/PavData5/MOU007
Deleted: /media/david/PavData5/MOU013
Deleted: /media/david/PavData5/MOU015
Deleted: /media/david/PavData5/MOU016
Deleted: /media/david/PavData5/MOU017
Deleted: /media/david/PavData5/MOU018
Deleted: /media/david/PavData5/MOU019
Deleted: /media/david/PavData5/MOU024
Deleted: /media/david/PavData5/MOU025
Deleted: /media/david/PavData5/MOU026
Deleted: /media/david/PavData5/MOU027
Deleted: /media/david/PavData5/MOU028
Deleted: /media/david/PavData5/MOU029
Deleted: /media/david/PavData5/MOU030
Deleted: /media/david/PavData5/MOU031
Deleted: /media/david/PavData5/MOU032
Deleted: /media/david/PavData5/MOU033
Deleted: /media/david/PavData5/MOU074
Deleted: /media/david/PavData5/MOU075
Deleted: /media/david/PavData5/MOU079
Deleted: /media/david/PavData5/MOU093
Deleted: /media/david/PavData5/MOU094
Deleted: /me