> Optional: This script organizes files into `12hrs`, `18hrs`, and `24hrs` subfolders under each chromosome, based on filenames. Neext it creates two subfolders inside each time points based on the mock and infected naming and put the relevant files there
> Use only if the dataset includes multiple time points in file names and is not organized


In [9]:
import os
import shutil

base_path = "data/green_monkey/all_structure_files/"
time_points = ["12hrs", "18hrs", "24hrs"]
conditions = ["untr", "vacv"] 

In [8]:
for chr_folder in os.listdir(base_path):
    chr_folder_path = os.path.join(base_path, chr_folder)
    if not os.path.isdir(chr_folder_path) or not chr_folder.startswith('chr'):
        continue

    # Create time point subfolders
    for tp in time_points:
        tp_folder = os.path.join(chr_folder_path, tp)
        os.makedirs(tp_folder, exist_ok=True)

    # Move files to the correct timepoint subfolder
    for filename in os.listdir(chr_folder_path):
        file_path = os.path.join(chr_folder_path, filename)
        if os.path.isdir(file_path):  # Skip folders
            continue
        for tp in time_points:
            if tp in filename:
                target_path = os.path.join(chr_folder_path, tp, filename)
                shutil.move(file_path, target_path)
                print(f"Moved {filename} â†’ {tp}/")
                break  # Move only once

print("Done.")


Done.


In [10]:
for chr_folder in os.listdir(base_path):
    chr_folder_path = os.path.join(base_path, chr_folder)
    if not os.path.isdir(chr_folder_path) or not chr_folder.startswith('chr'):
        continue

    for tp in time_points:
        tp_folder = os.path.join(chr_folder_path, tp)
        if not os.path.isdir(tp_folder):
            continue

        # Create condition subfolders
        for cond in conditions:
            cond_folder = os.path.join(tp_folder, cond)
            os.makedirs(cond_folder, exist_ok=True)
            print(f"Created: {cond_folder}")

        # Find and move files based on condition substring
        for fname in os.listdir(tp_folder):
            file_path = os.path.join(tp_folder, fname)
            if not os.path.isfile(file_path):
                continue  # Skip directories

            fname_lower = fname.lower()
            matched = False
            for cond in conditions:
                if cond in fname_lower:
                    target_path = os.path.join(tp_folder, cond, fname)
                    shutil.move(file_path, target_path)
                    print(f"Moved {fname} to {tp}/{cond}/")
                    matched = True
                    break  # Move to only one condition
            if not matched:
                print(f"No condition match for: {fname}")

print("Done.")


Created: data/green_monkey/all_structure_files/chr23/12hrs/untr
Created: data/green_monkey/all_structure_files/chr23/12hrs/vacv
Moved structure_12hrs_untr_with_id0.csv to 12hrs/untr/
Moved structure_12hrs_untr.csv to 12hrs/untr/
Moved structure_12hrs_vacv.csv to 12hrs/vacv/
Moved structure_12hrs_vacv_with_id0.csv to 12hrs/vacv/
Created: data/green_monkey/all_structure_files/chr23/18hrs/untr
Created: data/green_monkey/all_structure_files/chr23/18hrs/vacv
Moved structure_18hrs_untr.csv to 18hrs/untr/
Moved structure_18hrs_untr_with_id0.csv to 18hrs/untr/
Moved structure_18hrs_vacv_with_id0.csv to 18hrs/vacv/
Moved structure_18hrs_vacv.csv to 18hrs/vacv/
Created: data/green_monkey/all_structure_files/chr23/24hrs/untr
Created: data/green_monkey/all_structure_files/chr23/24hrs/vacv
Moved structure_24hrs_vacv.csv to 24hrs/vacv/
Moved structure_24hrs_untr_with_id0.csv to 24hrs/untr/
Moved structure_24hrs_vacv_with_id0.csv to 24hrs/vacv/
Moved structure_24hrs_untr.csv to 24hrs/untr/
Created: d