In [4]:
import os
import glob
import json
from collections import defaultdict


def cft_study_meta_base(fp_base):
    meta = {}
    study_detail_logs = glob.glob(os.path.join(fp_base, '*.json'))
    for _sdl in study_detail_logs:
        print(_sdl)
        _meta_key = os.path.basename(_sdl)
        meta[_meta_key] = json.load(open(_sdl, 'r'))
    return meta


def print_dict_recursively(d, indent=0):
    # Create indentation string based on the current recursion depth
    indent_str = '  ' * indent
    print()
    for k, v in d.items():
        if isinstance(v, dict):
            # If the value is another dictionary, print its key and recursively call this function
            print(f"{indent_str}{k}:")
            print_dict_recursively(v, indent + 2)
        else:
            # If the value is not a dictionary, print the key and value
            print(f"{indent_str}{k}: {v}")
    print()

def cft_study_meta_recon(fp_base):
    fp_recon = os.path.join(fp_base, 'recon/study_detail.json')
    f = open(fp_recon, 'r')
    meta_recon = json.load(f)
    return meta_recon


def print_cft_study_roi(meta_recon):
    for _k, _roi in meta_recon['study_details']['rois'].items():
        _bb_dim = _roi['bounding_box']
        x0, y0, x1, y1 = tuple(_bb_dim)
        dx = x1-x0
        dy = y1-y0
        area = dx*dy
        print()
        print("-----------------------------")
        print()
        print(f"  ROI: {_roi['name']}")
        print()
        print(f" BBox: ")
        print()
        print(f" (x0, x1) = ({x0}, {x1})")
        print(f" (y0, y1) = ({y0}, {y1})")
        print()
        print(f"       dx = {dx:7d} pixel width")
        print(f"       dy = {dy:7d} pixel width")
        print(f"     area = {area:7d} pixel")
        print()


def find_files_by_extension(root_dir):
    # Dictionary to store the extensions and their associated file paths
    extensions_dict = defaultdict(list)

    # Walk through all directories and files in the specified root directory
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            # Get the full path of the file
            full_path = os.path.join(dirpath, filename)
            # Extract the extension from the filename
            _, ext = os.path.splitext(filename)
            # Normalize the extension to ensure consistency (optional)
            ext = ext.lower().strip('.')
            # Append the full path of the file to the list of paths for this extension
            if ext:  # Make sure there's an extension
                extensions_dict[ext].append(full_path)
    _ed = dict(extensions_dict)
    return {_k: _ed[_k] for _k in sorted(_ed)}


def summarize_directory(filetype_dict):
    print(f"Found {len(filetype_dict.keys())} extensions (filecount):")
    for _k, _v in filetype_dict.items():
        print(f"  .{_k:7s} ({len(_v)})")

In [5]:
fp_base = '/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647'

meta = cft_study_meta_base(fp_base)
print_dict_recursively(meta)
print()
filetype_dict = find_files_by_extension(fp_base)
summarize_directory(filetype_dict)

/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/study_detail_20240428_203933347.json

study_detail_20240428_203933347.json:

    study_details:

        study: 20240428_LF_AF647
        subject: Multiple Mouse Tumors
        email: na
        email_int: -1
        fov: FOV A
        protocol: AF647
        num_slices: 1332
        slice_thickness: 20
        temp_setpoint: -14
        card_rect: [0, 0, 4112, 3008]
        block_rect: [50, 100, 3700, 2900]

    protocol_detail (out):

        protocol:

            name: AF647
            path: E:\EmitImagingSoftwareV2\study_data\users\howard\protocols\AF647.json

        apps: [{'name': 'AF 647', 'laser': '640', 'led': '', 'filter': '680/13', 'exp_opt': 'HDR+', 'man_exp': 0}]




Found 5 extensions (filecount):
  .jp2     (99975)
  .jpg     (22662)
  .json    (2)
  .p       (2)
  .tiff    (17437)


In [6]:
filetype_dict.keys()

dict_keys(['jp2', 'jpg', 'json', 'p', 'tiff'])

In [15]:
filetype_dict['jp2'][:5]

['/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/recon/rois/d0M1/ex_640nm_em_680nm/1500/slice_0001_1500.jp2',
 '/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/recon/rois/d0M1/ex_640nm_em_680nm/1500/slice_0002_1500.jp2',
 '/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/recon/rois/d0M1/ex_640nm_em_680nm/1500/slice_0003_1500.jp2',
 '/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/recon/rois/d0M1/ex_640nm_em_680nm/1500/slice_0004_1500.jp2',
 '/mnt/server/bmc-lab6/CFT/CFT Data from New Version/20240428_LF_AF647/recon/rois/d0M1/ex_640nm_em_680nm/1500/slice_0005_1500.jp2']

In [36]:
import pandas as pd

filetype = list(filetype_dict.keys())[0]
print(f"Filetype: {filetype}")
df = pd.DataFrame(filetype_dict['jp2'], columns=['filepath'])

# _substr = 'ex_640nm_em_680nm'
# _substr = '1500'
_substr = 'd0M1'

print(df[df['filepath'].apply(lambda x: _substr in x)].reset_index(drop=True))
print(df['filepath'].apply(lambda x: _substr in x).value_counts())

Filetype: jp2
                                               filepath
0     /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
1     /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
2     /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
3     /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
4     /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
...                                                 ...
6660  /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
6661  /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
6662  /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
6663  /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...
6664  /mnt/server/bmc-lab6/CFT/CFT Data from New Ver...

[6665 rows x 1 columns]
False    93310
True      6665
Name: filepath, dtype: int64
