In [None]:
# give parent directory
# search daughter directories for files with certain naming convention
# move those files to a new directory
# log the moved files into a csv file

import os
import shutil
import pandas as pd
from pathlib import Path
import datetime

def clean_dir(dir, out, log_pth, starts=None, contains=None, ends=None):
    # Ensure target directory exists
    os.makedirs(out, exist_ok=True)

    # Prepare a list to log moved files
    log_data = []

    # Walk through the parent directory
    for root, dirs, files in os.walk(dir):
        for idx, s_dir in enumerate(dirs):
            sub_dir = os.path.join(dir, s_dir)
            print(f"sub-directory {idx}/{len(dirs)}: {sub_dir}")
            
            for sub_rt, sub_sub_dirs, sub_files in os.walk(sub_dir):
                print(f"sub_files: {sub_files}")
                
                for file in sub_files:
                    
                    matches = True
                    if starts is not None and not file.startswith(starts):
                        matches = False
                    if contains is not None and contains not in file:
                        matches = False
                    if ends is not None and not file.endswith(ends):
                        matches = False
                    
                    if matches:
                        #print(f"Match: {file}")
                        
                        src_path = os.path.join(sub_dir, file)
                        trgt_dir = os.path.join(out, s_dir)
                        os.makedirs(trgt_dir, exist_ok=True)
                        #print(f"Made directory: {trgt_dir}")

                        trgt_pth = os.path.join(trgt_dir, file)
                        #print(f"Moving {src_path} to {trgt_pth}")

                        # Move the file
                        shutil.move(src_path, trgt_pth)

                        # Log the moved file
                        log_data.append({'file_name': file, 
                                        'source_path': src_path, 
                                        'target_path': trgt_pth, 
                                        'now': datetime.datetime.now()})

    # Create a DataFrame and save to CSV
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(log_pth, index=False)
    print(f"log saved to {log_pth}")
    

clean_dir(dir = "/host/verges/tank/data/daniel/01_3T7T/z/maps/",
          out = "/host/verges/tank/data/daniel/01_3T7T/z/maps_old/",
          log_pth = f"/host/verges/tank/data/daniel/01_3T7T/z/maps/cleanup_log_{datetime.datetime.now().strftime('%d%b%Y-%H%M')}.csv",
          contains="_hipp_")