This notebook reproduces the **fluorescence intensity quantification of N-cadherin** described in the manuscript.  

## Analysis Workflow

- **Profile Acquisition (in FIJI)** 
  - In Fiji (ImageJ), a line of fixed width (10 pixels) was drawn from the apical to the basal surface.
  - Fluorescence intensity (gray values) was extracted along this line using the “Plot Profile” function, and exported as CSV

- **Data organization**
  - Make sure the line plot do not extend the apical or basal surface by applying the fiteration rules:
    - Find the maximum value in Gray_Value and keep that row and all after, in order to remove noise in lumen (given that NCAD is enriched in the apical surface)
    - Find the first Gray_Value < 5, and remove that row and all after, in order to remove background noise outside of bud.

- **Normalization**
  - Line length rescaled to a unit interval [0, 1]:
    - 0 → apical surface  
    - 1 → basal surface  

## Outputs
- Raw gray value profiles per droplet/tissue region  
- Normalized intensity profiles (apical → basal)  
- Group-level averaged profiles for comparison across conditions  

In [None]:
# imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scipy.stats as stats
import itertools
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multitest as multitest

### Step1: determine input and output pathway

In [None]:
source_folder = 'input-folder-name' # data structure: source_folder >> subfolder {condition} >> csv
destination_folder = 'output-folder-name'# data structure: des_folder >> subfolder {condition} >> edited_csv from step2

### Step2: data access and processing

In [None]:
def read_csv_with_fallback(file_path):
    encodings_to_try = ['utf-8', 'utf-16', 'ISO-8859-1']
    delimiters = [',', '\t', ';']

    for enc in encodings_to_try:
        for delim in delimiters:
            try:
                df = pd.read_csv(file_path, encoding=enc, delimiter=delim)
                if 'Gray_Value' in df.columns and 'Distance_(microns)' in df.columns:
                    return df
            except Exception:
                continue
    raise ValueError(f"Could not parse {file_path} with common encodings and delimiters.")


for root, dirs, files in os.walk(source_folder):
    for filename in files:
        if filename.endswith('.csv'):
            source_path = os.path.join(root, filename)

            try:
                df = read_csv_with_fallback(source_path)

                # Step 1: Keep from max Gray_Value onward
                max_idx = df['Gray_Value'].idxmax()
                df = df.iloc[max_idx:].reset_index(drop=True)

                # Step 2: Trim after first Gray_Value < 5 (inclusive)
                below_5 = df[df['Gray_Value'] < 5]
                if not below_5.empty:
                    end_idx = below_5.index[0]
                    df = df.iloc[:end_idx + 1].reset_index(drop=True)

                # Step 3: Calculate edited_distance
                start_dist = df.loc[0, 'Distance_(microns)']
                df['edited_distance'] = df['Distance_(microns)'] - start_dist

                # Step 4: Normalize to 0–1
                max_edit_dist = df['edited_distance'].max()
                if max_edit_dist > 0:
                    df['normalised_distance'] = df['edited_distance'] / max_edit_dist
                else:
                    df['normalised_distance'] = 0  # handle constant distances

                # Save to mirrored destination folder
                relative_path = os.path.relpath(root, source_folder)
                dest_subfolder = os.path.join(destination_folder, relative_path)
                os.makedirs(dest_subfolder, exist_ok=True)
                dest_path = os.path.join(dest_subfolder, filename)
                df.to_csv(dest_path, index=False)

            except Exception as e:
                print(f"❌ Failed to process {source_path}: {e}")

print("✅ All files processed and saved with added distance normalization.")

### Step3: visualise including individual line plot, mean & std

In [None]:
# define where to save output folder
figures_folder = 'figure_output_folder_name'
os.makedirs(figures_folder, exist_ok=True)

In [None]:
for root, dirs, files in os.walk(edited_base_folder):
    csv_files = [f for f in files if f.endswith('.csv')]
    if not csv_files:
        continue 

    data_list = []
    labels = []

    for f in csv_files:
        path = os.path.join(root, f)
        try:
            df = pd.read_csv(path)
            if 'normalised_distance' in df.columns and 'Gray_Value' in df.columns:
                data_list.append(df[['normalised_distance', 'Gray_Value']])
                labels.append(os.path.splitext(f)[0])
        except Exception as e:
            print(f"⚠️ Failed to read {path}: {e}")

    if not data_list:
        continue

    # Use the last part of the subfolder path as the label
    subfolder_name = os.path.basename(root)

    
    ### === Plot 1: individual line plot === ###
    plt.figure(figsize=(6, 4))
    for df, label in zip(data_list, labels):
        plt.scatter(df['normalised_distance'], df['Gray_Value'], label=label, alpha=0.6, s=5)
        plt.plot(df['normalised_distance'], df['Gray_Value'], alpha=0.8, linewidth = 0.8 )
    plt.title(f'Scatter Plot - {subfolder_name}')
    plt.xlabel('Normalised Distance')
    plt.ylabel('Gray Value')
    plt.ylim(0,1200)
    plt.grid(True, alpha = 0.5)
    plt.tight_layout()
    scatter_path = os.path.join(figures_folder, f'{subfolder_name}_scatter_plot.pdf')
    plt.savefig(scatter_path)
    # plt.close()

    
    ### === Plot 2: Mean ± STD per condition per plot === ###
    common_x = np.linspace(0, 1, 100)
    interpolated_y = []

    for df in data_list:
        interp = np.interp(common_x, df['normalised_distance'], df['Gray_Value'])
        interpolated_y.append(interp)

    interpolated_y = np.array(interpolated_y)
    mean_y = np.mean(interpolated_y, axis=0)
    std_y = np.std(interpolated_y, axis=0)

    plt.figure(figsize=(10, 6))
    plt.plot(common_x, mean_y, color='blue', label='Mean')
    plt.fill_between(common_x, mean_y - std_y, mean_y + std_y, color='blue', alpha=0.3, label='±1 Std Dev')
    plt.title(f'Mean ± STD - {subfolder_name}')
    plt.xlabel('Normalised Distance')
    plt.ylabel('Gray Value')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    mean_std_path = os.path.join(figures_folder, f'{subfolder_name}_mean_std_plot.pdf')
    plt.savefig(mean_std_path)
    # plt.close()
    
    print(f"✅ Saved: {subfolder_name}_scatter_plot.png & {subfolder_name}_mean_std_plot.png")

In [None]:
### === (Alternative to Plot 2) Plot 3: Mean ± STD from all selected conditions into 1 plot ===
selected_subfolders = ['condition1', 'condition2', 'condition3']  # define selected subfolders

summary_data = []
summary_labels = []

for subfolder in selected_subfolders:
    root = os.path.join(edited_base_folder, subfolder)
    if not os.path.isdir(root):
        print(f"⚠️ Subfolder not found: {root}")
        continue

    files = os.listdir(root)
    csv_files = [f for f in files if f.endswith('.csv')]
    if not csv_files:
        continue

    data_list = []

    for f in csv_files:
        path = os.path.join(root, f)
        try:
            df = pd.read_csv(path)
            if 'normalised_distance' in df.columns and 'Gray_Value' in df.columns:
                data_list.append(df[['normalised_distance', 'Gray_Value']])
        except Exception as e:
            print(f"⚠️ Failed to read {path}: {e}")

    if not data_list:
        continue

    common_x = np.linspace(0, 1, 100)
    interpolated_y = []
    for df in data_list:
        interp = np.interp(common_x, df['normalised_distance'], df['Gray_Value'])
        interpolated_y.append(interp)

    interpolated_y = np.array(interpolated_y)
    mean_y = np.mean(interpolated_y, axis=0)
    std_y = np.std(interpolated_y, axis=0)

    summary_data.append((common_x, mean_y, std_y))
    summary_labels.append(subfolder)

# === PLOTTING ===
if summary_data:
    plt.figure(figsize=(12, 7))
    for (x, mean_y, std_y), label in zip(summary_data, summary_labels):
        plt.plot(x, mean_y, label=f'{label} Mean')
        plt.fill_between(x, mean_y - std_y, mean_y + std_y, alpha=0.2)

    plt.title('Combined Mean ± STD from Selected Subfolders')
    plt.xlabel('Normalised Distance')
    plt.ylabel('Gray Value')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    combined_path = os.path.join(figures_folder, 'combined_mean_std_plot.pdf')
    plt.savefig(combined_path)
    # plt.close()

    print(f"✅ Saved: combined_mean_std_plot_day10.pdf")
else:
    print("⚠️ No valid data found for selected subfolders.")
