In [1]:
import contextlib
import pathlib

import np_config
import np_session

INCOMING_ROOT = pathlib.Path(
    "//allen/programs/mindscope/production/incoming"
)
assert INCOMING_ROOT.exists()

DRYRUN = False

In [2]:
def size(path: pathlib.Path) -> int:
    return sum(
        f.stat().st_size
        for f in path.rglob('*')
        if f.is_file()
    ) if path.is_dir() else path.stat().st_size
    
def sizes_match(p1, p2):
    return size(p1) == size(p2)

def unlink(path):
    if DRYRUN:
        return
    if path.is_dir():
        while path.exists():
            for _ in path.iterdir():
                with contextlib.suppress(OSError):
                    unlink(_)
            path.rmdir()
    else:
        path.unlink()

Delete raw ephys data that's already on lims (judging by file size):

In [3]:
total_bytes = 0  
for incoming_path in INCOMING_ROOT.iterdir():
    
    try:
        session = np_session.PipelineSession(incoming_path)
    except np_session.SessionError:
        continue
    
    if not session.lims_path:
        continue
    
    if (
        (existing := (session.lims_path / incoming_path.name)).exists()
    ):
        for incoming_file in incoming_path.rglob('*'):
            if (
                (existing_file := existing / incoming_file.relative_to(incoming_path)).exists()
                and sizes_match(incoming_file, existing_file)
            ):
                total_bytes += size(incoming_file)
                unlink(incoming_file)
                print(f"Cleared {total_bytes/1024**3:,.0f} GB\r", end='')

Delete sorted probe data that's already on lims (judging by file size and
last-modified time, in case multiple probe uploads have been made):

In [None]:
total_bytes = 0  
for probe in 'ABCDEF':
    for probe_folder in INCOMING_ROOT.glob(f'*probe{probe}_sorted'):
        
        try:
            session = np_session.Session(probe_folder)
        except np_session.SessionError:
            continue
        
        if f'ephys_raw_data_probe_{probe.upper()}_sorted' not in session.platform_json.files:
            # probe not inserted and will never be uploaded
            total_bytes += size(probe_folder)
            unlink(probe_folder)
            print(f"Cleared {total_bytes/1024**3:,.0f} GB\r", end='')
            continue
        
        if not session.lims_path:
            continue
        
        for incoming_path in probe_folder.rglob('*'):
            if incoming_path.is_dir():
                continue
            if (existing := tuple(session.lims_path.rglob(f'*/*_probe{probe}/{incoming_path.relative_to(probe_folder)}') )):
                for lims_path in existing:
                    if sizes_match(lims_path, incoming_path) and lims_path.stat().st_mtime == incoming_path.stat().st_mtime:
                        total_bytes += size(incoming_path)
                        unlink(incoming_path)
                        print(f"Cleared {total_bytes/1024**3:,.0f} GB\r", end='')
                        break
                        