In [57]:
import os
import re
import numpy as np
import pandas as pd
import scipy as sp
import scipy.io as sio
import seaborn as sns
from tqdm import tqdm
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)     # ignore warnings for specific matplotlib commands which will be outdated soon
warnings.simplefilter(action='ignore', category=FutureWarning)

In [58]:
def convert_to_code_path(windows_path):
    # Replace each single backslash with a double backslash
    return windows_path.replace("\\", "\\\\")

In [59]:
import os
import re
import pandas as pd

def extract_numeric(value):
    """
    Extract numeric part from a string. Returns float if successful, else returns None.
    """
    if isinstance(value, (int, float)):
        return float(value)
    elif isinstance(value, str):
        # Use regex to extract numbers from the string
        match = re.search(r'[\d.]+', value)
        if match:
            return float(match.group(0))
    return None  # Return None if no numeric value is found

def process_subfolders(main_folder, div, sheet_path, sheet_name='ridges', frame_interval_range=None, resolution_range=None):
    """
    Function to check the validity of subfolders based on their corresponding frame interval and resolution,
    and whether their names contain the relevant .csv filename.

    Args:
    - main_folder: Main directory path containing subfolders.
    - div: The 'div' value to filter the Excel sheet.
    - sheet_path: Path to the Excel sheet containing imaging details.
    - sheet_name: The sheet name in the Excel file (default: 'glass').
    - resolution_range: Tuple (min_resolution, max_resolution) to filter by resolution.
    - frame_interval_range: Tuple (min_interval, max_interval) to filter by frame interval.

    Returns:
    - valid_subfolder_count: The number of subfolders that meet the validity conditions.
    - skipped_folders: A list of subfolders that were skipped due to not meeting the conditions.
    """

    # Load the Excel sheet
    df_sheet = pd.read_excel(sheet_path, sheet_name=sheet_name)
    df_sheet_filtered = df_sheet[df_sheet['div'] == div]  # Filter by 'div'

    valid_subfolder_count = 0
    skipped_folders = []

    # Loop through each subfolder in the main directory
    for subfolder in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder)

        # Check if it's a directory
        if os.path.isdir(subfolder_path):
            matched = False
            # Iterate through filenames from the Excel sheet and compare them with subfolder names
            for value in df_sheet_filtered['file name']:
                # print(f"Comparing subfolder '{subfolder}' with Excel filename '{value}'")
                if value.lower() in subfolder.lower():  # Case-insensitive check for substring
                    print(f"Match found: Subfolder '{subfolder}' contains '{value}' from Excel sheet.")
                    matched = True
                    index = df_sheet_filtered.index[df_sheet_filtered['file name'] == value].tolist()

                    if index:
                        # Extract and validate frame interval
                        interval = df_sheet_filtered['frame interval'][index].values[0]
                        integer_interval_value = extract_numeric(interval)

                        # Extract and validate resolution
                        resolution = df_sheet_filtered['resolution'][index].values[0]
                        integer_resolution_value = extract_numeric(resolution)

                        # Check validity based on provided ranges
                        if ((resolution_range is None or integer_resolution_value is None or (resolution_range[0] <= integer_resolution_value <= resolution_range[1])) and
                            (frame_interval_range is None or integer_interval_value is None or (frame_interval_range[0] <= integer_interval_value <= frame_interval_range[1]))):
                            valid_subfolder_count += 1
                        else:
                            skipped_folders.append({
                                'folder': subfolder,
                                'frame_interval': integer_interval_value,
                                'resolution': integer_resolution_value
                            })
                    break

            # If no match was found, record it
            if not matched:
                print(f"No match found for subfolder '{subfolder}' in the Excel 'file name' column.")
                skipped_folders.append({
                    'folder': subfolder,
                    'reason': 'No matching filename found in sheet.'
                })

    print(f"{valid_subfolder_count} subfolders passed the validity checks.")
    print(f"{len(skipped_folders)} subfolders were skipped due to invalid conditions or no matching filenames:")
    for skipped in skipped_folders:
        print(f"Folder: {skipped['folder']}, Reason: {skipped.get('reason', 'Invalid Frame Interval/Resolution')}, Frame Interval: {skipped.get('frame_interval', 'None')}, Resolution: {skipped.get('resolution', 'None')}")

    return valid_subfolder_count, skipped_folders


In [67]:
frame_interval = [1.8,2.2]
resolution = [2.6,2.9]

## DIV2 
main_folder = r'E:\Spandan\2D_Neurons_Paper\Ridges\div2'   # Specify the main folder 
sheet_path = r'E:\Spandan\Kate\NEURON MOVIES\tifNotes.xlsx'  # Path to the Excel file

valid_subfolder_count, skipped_folders = process_subfolders(main_folder, div=2, sheet_path=sheet_path, frame_interval_range=frame_interval, resolution_range=resolution)




Match found: Subfolder 'ridges_19_01_21_div2cortex_B1_timelapse2' contains 'div2cortex_B1_timelapse2' from Excel sheet.
Match found: Subfolder 'ridges_19_01_21_div2cortex_B1_timelapse3' contains 'div2cortex_B1_timelapse3' from Excel sheet.
Match found: Subfolder 'ridges_19_01_21_div2cortex_B1_timelapse5' contains 'div2cortex_B1_timelapse5' from Excel sheet.
Match found: Subfolder 'ridges_19_01_21_div2cortex_B1_timelapse6' contains 'div2cortex_B1_timelapse6' from Excel sheet.
Match found: Subfolder 'ridges_19_02_11_C2_ridges_timelapse1' contains 'C2_ridges_timelapse1' from Excel sheet.
Match found: Subfolder 'ridges_19_02_11_C2_ridges_timelapse4a' contains 'C2_ridges_timelapse4a' from Excel sheet.
Match found: Subfolder 'ridges_19_02_11_C2_ridges_timelapse7' contains 'C2_ridges_timelapse7' from Excel sheet.
7 subfolders passed the validity checks.
0 subfolders were skipped due to invalid conditions or no matching filenames:
