# Notebook Setup

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from scipy.stats import shapiro
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests
from scipy.stats import levene
from matplotlib import ticker

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Data Reading

In [2]:
columns_names =  ["ROIs", "Region", "Brain", "Sample", "Group", "Area", "Mean"]

filename = "box_data/Results_All_A_B .csv"

df = pd.read_csv(filename,sep=";", skiprows=4, names=columns_names).replace("Ctx","CTX").replace("Thal","TH").replace("Hip","HIP")

#df.head()

In [3]:
class sds_line:

    def __init__(self, region:str, brain:int, sample:str, group:str, area:int, mean:float):
        
        self.region:str = region
        self.brain:int = brain
        self.sample:str = sample
        self.group:str = group
        self.area:int = area
        self.mean:float = mean

        self.cIntensity: float = 0.0
        self.nIntensity: float = 0.0

    def __repr__(self):
        
        out_print = f"Line Class:\n"
        out_print += f"- region: {self.region}\n"
        out_print += f"- brain: {self.brain}\n"
        out_print += f"- sample: {self.sample}\n"
        out_print += f"- group: {self.group}\n"
        out_print += f"- area: {self.area}\n"
        out_print += f"- mean: {self.mean}\n"
        out_print += f"- corrected intensity: {self.cIntensity}\n"
        out_print += f"- normalized intensity: {self.nIntensity}\n\n"

        return out_print
    
    def compute_cIntensity(self, background_mean:float):

        self.cIntensity = np.abs(self.mean - background_mean) * self.area

    def compute_nIntensity(self, total_line_cIntensity:float):

        self.nIntensity = self.cIntensity / total_line_cIntensity

In [4]:
line_list: list[sds_line] = []

line_dict = {}

for index, row in df.iterrows():

    line = sds_line(region=row['Region'],
                    brain=row['Brain'],
                    sample=row['Sample'],
                    group=row['Group'],
                    area=row['Area'],
                    mean=row['Mean'],)
    
    line_list.append(line)

    line_dict[(row['Region'], row['Brain'],row['Sample'],row['Group'])] = {
        "Area" : row['Area'],
        "Mean" : row['Mean']
    }

## Corrected Intensity Computation

In [5]:
for line in line_list:

    background_mean = line_dict[(line.region, line.brain, "Background", line.group)]["Mean"]
    
    line.compute_cIntensity(background_mean=background_mean)

    line_dict[(line.region, line.brain, line.sample, line.group)]["Corrected Intensity"] = line.cIntensity

for i, line in enumerate(line_list):

    brain_key = line.brain

    if (line.region, brain_key, "Total Line", line.group) not in line_dict.keys():

        j = i-1
        line_old = line_list[j]


        while line_old.sample != "Total Line":
            j -= 1
            line_old = line_list[j]

        brain_key = line_old.brain

        #print(f"For line {(line.region, line.brain, line.sample, line.group)} using the total line { (line.region, brain_key, 'Total Line', line.group)}")

    total_line_cIntensity = line_dict[(line.region, brain_key, "Total Line", line.group)]["Corrected Intensity"]

    line.compute_nIntensity(total_line_cIntensity=total_line_cIntensity)

# Plots

## BoxPlot 1: Comparison A with B all data points (legend:regions)

In [6]:
lines_band = [line for line in line_list if "Band" in line.sample]

df_boxplot =  pd.DataFrame(columns=['Region','Group','Relative Intensity'], index=[i for i in range(len(lines_band))] )

for i, line in enumerate(lines_band):

    df_boxplot.loc[i] = pd.Series( {'Region':line.region, 'Group':line.group, 'Relative Intensity':line.nIntensity} )

In [7]:
# Set figure size
fig,ax = plt.subplots(1,1,figsize=(7, 5))

custom_colors = sns.color_palette("Set2")

# Create a boxplot showing the distribution of relative intensities by group
_ = sns.boxplot(x="Group", y="Relative Intensity", data=df_boxplot, showcaps=True, boxprops={'facecolor': 'None'}, fliersize=0)

# Overlay individual data points, colored by brain region
_ = strip = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot, hue="Region", palette=custom_colors, dodge=True,
                  jitter=True, marker="o", alpha=0.7, edgecolor="gray", linewidth=0.8)

# Add title and axis labels
#_ = plt.title("Comparison of Relative Intensities Between Groups A and B")
_ = plt.ylabel("Relative Intensity (a.u.)")
_ = plt.xlabel("Groups")

_ = plt.xticks(ticks=plt.xticks()[0],labels=["A (11 a.m.)","B (11 p.m.)"])

# Set y-axis to logarithmic scale
_ = plt.yscale('log')

handles, labels = ax.get_legend_handles_labels()

# Adjust the legend position
_ = plt.legend(handles[0:5], labels[0:5], title="Region", bbox_to_anchor=(1.05, 1), loc='upper left', handletextpad=0.05)

ax = plt.gca()  # get current axes

# Remove all spines (box/frame)
for spine in ax.spines.values():
    spine.set_visible(False)

# Set background color (optional)
ax.set_facecolor('white')

# Light grid
ax.grid(True, axis="y", which='major', color='lightgrey', linestyle='-', linewidth=0.5)
ax.grid(True, axis="y", which='minor', color='lightgrey', linestyle='--', linewidth=0.25)

# Enable minor ticks to show minor gridlines
ax.xaxis.set_minor_locator(ticker.NullLocator()) # turns off minor ticks

ax.xaxis.set_tick_params(width=0, which="both")
ax.yaxis.set_tick_params(width=0, which="both")

for collection in strip.collections:
    collection.set_edgecolor("black")
    collection.set_linewidth(0.5)  # Increase this for thicker edges

# Improve layout to prevent overlap
_ = plt.tight_layout()
#_ = plt.savefig("results_figures/Comparison_Regions_A_and_B.png")
#_ = plt.show()
plt.close(fig)

  _ = strip = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot, hue="Region", palette=custom_colors, dodge=True,
  _ = strip = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot, hue="Region", palette=custom_colors, dodge=True,


### Statistics: Comparison A with B all data points (legend:regions)

In [8]:
lines_band = [line for line in line_list if "Band" in line.sample]

df_boxplot_2 =  pd.DataFrame(columns=['Band','Group','Relative Intensity'], index=[i for i in range(len(lines_band))] )

for i, line in enumerate(lines_band):

    df_boxplot_2.loc[i] = pd.Series( {'Band':line.sample, 'Group':line.group, 'Relative Intensity':line.nIntensity} )

plot_intensities_A = np.array( [line.nIntensity for line in lines_band if line.group=="A"] )
plot_intensities_B = np.array( [line.nIntensity for line in lines_band if line.group=="B"] )

In [9]:
# Shapiro-Wilk test for normal distribution: Comparing A and B with all Data 
statA, pA = shapiro(plot_intensities_A)
statB, pB = shapiro(plot_intensities_B)

# Classification as “normally distributed” (N) or “not normally distributed” (nN)
normalA = "N" if pA > 0.05 else "nN" 
normalB = "N" if pB > 0.05 else "nN"

# Variance for group A and B (Variance = how strongly data points scatter around the mean value)
varA = np.var(plot_intensities_A, ddof=1)  # ddof=1 → sample variance
varB = np.var(plot_intensities_B, ddof=1)

# Levene's test for equal variances between groups A and B
stat_levene, p_levene = levene(plot_intensities_A, plot_intensities_B)

# Classification: Equal or unequal variances
variance_homogeneity = "Equal" if p_levene > 0.05 else "Unequal"

# Mann-Whitney U-test (Difference between groups, without normal distribution)
statMW, pMW = mannwhitneyu(plot_intensities_A, plot_intensities_B, alternative='two-sided')
significant = "Significant" if pMW < 0.05 else "Not Significant"

# Create DataFrame for visualizing results
results_df_A_B = pd.DataFrame({
    "Group": ["A", "B"],
    "SW_W": [statA, statB],
    "SW_p": [pA, pB],
    "Normality": [normalA, normalB],
    "Variance": [varA, varB]
})

results_df_Group_Comparison = pd.DataFrame({
    "MW_U": [statMW],
    "MWh_p": [pMW],
    "Significance": [significant],
    "Levene_F": [stat_levene],
    "Levene_p": [p_levene],
    "Variance_Equality": [variance_homogeneity]
})


#results_df_A_B
#results_df_Group_Comparison

results_df_A_B.to_csv("figures/Statistical_Comparison_Groups_A_and_B.csv", index=False)
results_df_Group_Comparison.to_csv("figures/MW_Group_Comparison.csv", index=False)


## Boxplot 2: Comparison A with B all data points (legend:bands)

In [10]:
# Set colors
custom_colors = sns.color_palette("Set2")

# Extract intensities for Group A and B
plot_intensities_A = df_boxplot_2[df_boxplot_2["Group"] == "A"]["Relative Intensity"]
plot_intensities_B = df_boxplot_2[df_boxplot_2["Group"] == "B"]["Relative Intensity"]

# Set figure size
fig, ax = plt.subplots(1, 1, figsize=(7, 5))

# Boxplot (no fill to allow stripplot dots to be seen)
_ = sns.boxplot(x="Group", y="Relative Intensity", data=df_boxplot_2, 
                showcaps=True, boxprops={'facecolor': 'None'}, ax=ax, fliersize=0)

# Stripplot with hue by Band
_ = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot_2, palette=custom_colors, hue="Band", dodge=True,
    jitter=True, marker="o", alpha=0.7, edgecolor="gray", linewidth=0.8, ax=ax)

# Title and labels
#_ = ax.set_title("Comparison of Relative Intensities Between Groups A and B")
_ = ax.set_ylabel("Relative Intensity (a.u.)")
_ = ax.set_xlabel("Groups")

_ = ax.set_xticks(ticks=ax.get_xticks(), labels=["A (11 a.m.)", "B (11 p.m.)"])

ax.xaxis.set_tick_params(width=0, which="both")
ax.yaxis.set_tick_params(width=0, which="both")

# Remove box/frame
for spine in ax.spines.values():
    spine.set_visible(False)

# Light grid
_ = ax.grid(True, axis="y", which='major', color='lightgrey', linestyle='-', linewidth=0.5)
_ = ax.grid(True, axis="y", which='minor', color='lightgrey', linestyle='--', linewidth=0.25)

# Log scale for Y-axis
_ = ax.set_yscale('log')

# Adjust legend
_ = ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', handletextpad=0.05)

# Save and show
plt.tight_layout()
#plt.savefig("results_figures/Comparison_Bands_Groups_A_and_B.png")
#plt.show()
plt.close()


  _ = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot_2, palette=custom_colors, hue="Band", dodge=True,
  _ = sns.stripplot(x="Group", y="Relative Intensity", data=df_boxplot_2, palette=custom_colors, hue="Band", dodge=True,


### Statistics: Comparison A with B all data points (legend:bands)

In [11]:
results = []

for band in df_boxplot_2['Band'].unique():
    data_band = df_boxplot_2[df_boxplot_2['Band'] == band]
    
    # Convert to float and drop NaNs
    intensities_A = data_band[data_band['Group'] == 'A']['Relative Intensity'].astype(float).dropna().values
    intensities_B = data_band[data_band['Group'] == 'B']['Relative Intensity'].astype(float).dropna().values

    # Shapiro-Wilk test
    if len(intensities_A) > 2:
        statA, pA = shapiro(intensities_A)
        normalA = "N" if pA > 0.05 else "nN"
    else:
        statA, pA, normalA = np.nan, np.nan, "Insufficient Data"

    if len(intensities_B) > 2:
        statB, pB = shapiro(intensities_B)
        normalB = "N" if pB > 0.05 else "nN"
    else:
        statB, pB, normalB = np.nan, np.nan, "Insufficient Data"

    # Variance
    varA = np.var(intensities_A, ddof=1) if len(intensities_A) > 1 else np.nan
    varB = np.var(intensities_B, ddof=1) if len(intensities_B) > 1 else np.nan

    # Mann-Whitney U-test
    if len(intensities_A) > 0 and len(intensities_B) > 0:
        statMW, pMW = mannwhitneyu(intensities_A, intensities_B, alternative='two-sided')
        significant = "Significant" if pMW < 0.05 else "Not Significant"
    else:
        statMW, pMW, significant = np.nan, np.nan, "Insufficient Data"

    results.append({
        'Band': band,
        'SW_W_A': statA,
        'SW_p_A': pA,
        'Normality_A': normalA,
        'Variance_A': varA,
        'SW_W_B': statB,
        'SW_p_B': pB,
        'Normality_B': normalB,
        'Variance_B': varB,
        'MW_U': statMW,
        'MW_p': pMW,
        'Significance': significant
    })

# Create DataFrame
results_df_band = pd.DataFrame(results)
#results_df_band

# Save to CSV
results_df_band.to_csv("figures/Statistical_Comparison_Per_Band.csv", index=False)


## Boxplot 3: Comparison Regions (legend:groups)

In [12]:
# Set fig size
fig, ax = plt.subplots(1, 1, figsize=(7, 5))

# Set colors 
custom_colors = sns.color_palette("Set1") 

# Boxplot with transparent fill so points remain visible, no edges
_ = ax_boxplot = sns.boxplot(x="Region", y="Relative Intensity", data=df_boxplot, showcaps=True, boxprops={'facecolor': 'None'}, fliersize=0)

# Stripplot with custom palette
_ = sns.stripplot(data=df_boxplot, x="Region", y="Relative Intensity", hue="Group",
              palette=custom_colors, jitter=True, edgecolor="gray", marker='o', alpha=0.6, linewidth=0.8, ax=ax)


# Labels and title
#_ = ax.set_title("Distribution of Relative Intensity per Brain Region and Group")
_ = ax.set_ylabel("Relative Intensity (a.u.)")
_ = ax.set_xlabel("Regions")

# Remove box/frame
for spine in ax.spines.values():
    spine.set_visible(False)

# Add subtle grid on y-axis only
ax.grid(True, axis="y", which='major', color='lightgrey', linestyle='-', linewidth=0.5)
ax.grid(True, axis="y", which='minor', color='lightgrey', linestyle='--', linewidth=0.25)

# Set y-axis to logarithmic scale
ax.set_yscale('log')

# Adjust legend: only first two handles (for groups), clean title, outside plot
handles, labels = ax.get_legend_handles_labels()
labels=["A (11 a.m.)", "B (11 p.m.)"]
_ = ax.legend(handles[0:2], labels[0:2], title="Group", bbox_to_anchor=(1.05, 1), loc='upper left', handletextpad=0.05)

ax.xaxis.set_tick_params(width=0, which="both")
ax.yaxis.set_tick_params(width=0, which="both")

plt.tight_layout()
#plt.savefig("results_figures/Comparison_Regions (x-axis)_A_and_B (data points).png")
#plt.show()
plt.close()


  _ = sns.stripplot(data=df_boxplot, x="Region", y="Relative Intensity", hue="Group",
  _ = sns.stripplot(data=df_boxplot, x="Region", y="Relative Intensity", hue="Group",


In [13]:
# Set fig size
fig, axs = plt.subplots(2, 1, figsize=(10, 7), sharex=True)
group_list = ["A","B"]
title_list=["A (11 a.m.)", "B (11 p.m.)"]

# Set colors 
custom_colors = sns.color_palette("Set1",n_colors=2) 

colors = ["red", "blue"]


for i,(ax, group, title, color) in enumerate( zip(axs, group_list,title_list, colors) ):


    df_boxplt_group = df_boxplot[df_boxplot["Group"]==group]

    

    # Boxplot with transparent fill so points remain visible, no edges
    _ = ax_boxplot = sns.boxplot(x="Region", y="Relative Intensity", data=df_boxplt_group, showcaps=True, boxprops={'facecolor': 'None'}, fliersize=0, ax=ax, legend=False)

    # Stripplot with custom palette
    _ = sns.stripplot(data=df_boxplt_group, x="Region", y="Relative Intensity", hue="Group",
                palette=custom_colors[i:], jitter=True, edgecolor="gray", marker='o', alpha=0.6, linewidth=0.8, ax=ax, legend=False)


    # Labels and title
    #_ = ax.set_title("Distribution of Relative Intensity per Brain Region and Group")
    #_ = ax.set_ylabel("Relative Intensity (a.u.)")
    _ = ax.set_ylabel("")
    _ = ax.set_xlabel("Regions", fontsize=13)

    # Remove box/frame
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Add subtle grid on y-axis only
    ax.grid(True, axis="y", which='major', color='lightgrey', linestyle='-', linewidth=0.5)
    ax.grid(True, axis="y", which='minor', color='lightgrey', linestyle='--', linewidth=0.25)

    # Set y-axis to logarithmic scale
    ax.set_yscale('log')

    # Set subplot title
    _ = ax.set_title(title, fontsize=13)

    # Adjust legend: only first two handles (for groups), clean title, outside plot
    #handles, labels = ax.get_legend_handles_labels()
    #labels=[label]
    #_ = ax.legend(handles[0:1], labels[0:1], title="Group", bbox_to_anchor=(1.05, 1), loc='upper left', handletextpad=0.05)

    ax.xaxis.set_tick_params(width=0, which="both")
    ax.yaxis.set_tick_params(width=0, which="both")

_ = fig.text(0.025, 0.5, "Relative Intensity (a.u.)", va='center', rotation='vertical', fontsize=13)

#plt.tight_layout()
plt.savefig("results_figures/Comparison_Regions_Split_yscale.png")
#plt.show()
plt.close()

  _ = sns.stripplot(data=df_boxplt_group, x="Region", y="Relative Intensity", hue="Group",
  _ = sns.stripplot(data=df_boxplt_group, x="Region", y="Relative Intensity", hue="Group",
  _ = sns.stripplot(data=df_boxplt_group, x="Region", y="Relative Intensity", hue="Group",


### Statistics: Comparison Regions (legend:groups)

In [14]:
region_list = ["CTX", "TH", "HIP", "CB", "OB"]
results = []

# Bonferroni correction
alpha = 0.05
n_tests = len(region_list)
alpha_corrected = alpha / n_tests

for region in region_list:#regions:
    data_region = df_boxplot[df_boxplot['Region'] == region]
    
    # Convert to float
    intensities_A = data_region[data_region['Group'] == 'A']['Relative Intensity'].astype(float).dropna().values
    intensities_B = data_region[data_region['Group'] == 'B']['Relative Intensity'].astype(float).dropna().values
    
    # Shapiro-Wilk test for normality
    if len(intensities_A) > 2:  # Shapiro needs >=3 data points
        statA, pA = shapiro(intensities_A)
        normalA = "N" if pA > 0.05 else "nN"
    else:
        statA, pA, normalA = np.nan, np.nan, "Insufficient Data"
        
    if len(intensities_B) > 2:
        statB, pB = shapiro(intensities_B)
        normalB = "N" if pB > 0.05 else "nN"
    else:
        statB, pB, normalB = np.nan, np.nan, "Insufficient Data"
    
    # Variance 
    varA = np.var(intensities_A, ddof=1) if len(intensities_A) > 1 else np.nan
    varB = np.var(intensities_B, ddof=1) if len(intensities_B) > 1 else np.nan
    
    # Mann-Whitney U-test
    if len(intensities_A) > 0 and len(intensities_B) > 0:
        statMW, pMW = mannwhitneyu(intensities_A, intensities_B, alternative='two-sided')
        significant = "Significant" if pMW < 0.05 else "Not Significant"
    else:
        statMW, pMW, significant = np.nan, np.nan, "Insufficient Data"

    results.append({'Region': region, 'SW_W_A': statA, 'SW_p_A': pA, 'Normality_A': normalA, 'Variance_A': varA,
        'SW_W_B': statB, 'SW_p_B': pB, 'Normality_B': normalB, 'Variance_B': varB, 'MW_U': statMW, 'MW_p': pMW, 'Significance': significant})

results_df = pd.DataFrame(results)

results_df['MW_p_Bonferroni'] = results_df['MW_p'] * n_tests
results_df['MW_p_Bonferroni'] = results_df['MW_p_Bonferroni'].clip(upper=1.0)

# Add corrected significance
results_df['Corrected Significance'] = results_df['MW_p'] < alpha_corrected
results_df['Corrected Significance'] = results_df['MW_p_Bonferroni'] < alpha

# Label as significant or not (after correction)
results_df['Corrected Significance'] = results_df['MW_p_Bonferroni'].apply(
lambda p: "Significant" if p < alpha else "Not Significant"
)

# Save results 
results_df.to_csv("figures/Statistical_Comparison_Per_Region.csv", index=False)

#results_df


# Isoform Ratio

## Calculations for Ratio

In [15]:
# Group and pivot the data
df_ratio_total = df_boxplot_2[df_boxplot_2['Band'].isin(['Band 1', 'Band 2'])]

pivot_total = df_ratio_total.groupby(['Group', 'Band'])['Relative Intensity'].mean().unstack()

# Calculate ratio
pivot_total['AQP4 Isoform Ratio (1/2)'] = pivot_total['Band 1'] / pivot_total['Band 2']
#print(pivot_total[['AQP4 Isoform Ratio (1/2)']])

# For interpretation: If ratio > 1, Met1 (Band 1) is more abundant than Met23 (Band 2)
# For interpretation: If ratio < 1, Met23 (Band 2) is more abundant than Met1 (Band 1)


In [16]:
# Define regions of interest
region_list = ["CTX", "TH", "HIP", "CB", "OB"]
results = []

for region in region_list:
    # Filter for region and all relevant bands
    region_lines = [line for line in line_list if line.region == region and line.sample in ['Band 1', 'Band 2', 'Band 3']]
    
    if not region_lines:
        continue

    # Create DataFrame
    df_region = pd.DataFrame({
        "Group": [line.group for line in region_lines],
        "Band": [line.sample for line in region_lines],
        "Relative Intensity": [line.nIntensity for line in region_lines]
    })

    # Remove invalid intensities
    df_region = df_region[df_region["Relative Intensity"] > 0].dropna()

    # Pivot table: average intensities per group and band
    pivot = df_region.groupby(['Group', 'Band'])['Relative Intensity'].mean().unstack()

    # Fill missing values with 0 for clean ratio calculations
    pivot = pivot.fillna(0)

    # Calculate extended ratios
    pivot['Ratio (1+3 / 2)'] = (pivot.get('Band 1', 0) + pivot.get('Band 3', 0)) / pivot.get('Band 2', 1)
    pivot['Ratio (1 / 2+3)'] = pivot.get('Band 1', 0) / (pivot.get('Band 2', 0) + pivot.get('Band 3', 0))
    pivot['Original Ratio (1 / 2)'] = pivot.get('Band 1', 0) / pivot.get('Band 2', 1)

    # Print summary for this region
    #print(f"\nRegion: {region}")
    #print(pivot[['Original Ratio (1 / 2)', 'Ratio (1+3 / 2)', 'Ratio (1 / 2+3)']])

    # Store results
    for group, row in pivot.iterrows():
        results.append({
            'Region': region,
            'Group': group,
            'Original Ratio (1 / 2)': row['Original Ratio (1 / 2)'],
            'Ratio (1+3 / 2)': row['Ratio (1+3 / 2)'],
            'Ratio (1 / 2+3)': row['Ratio (1 / 2+3)']
        })

# Final DataFrame and export
results_df = pd.DataFrame(results)
results_df.to_csv("figures/AQP4_Isoform_Ratio_Expanded.csv", index=False)


## Image Ratio

In [17]:
# Create DataFrame 
df = pd.DataFrame({
    "Region": [line.region for line in line_list],
    "Brain": [line.brain for line in line_list],
    "Sample": [line.sample for line in line_list],
    "Group": [line.group for line in line_list],
    "Relative Intensity": [line.nIntensity for line in line_list]
})

# Filter for bands of interest
df_bands = df[df['Sample'].isin(['Band 1', 'Band 2'])]

df_pivot = df_bands.pivot_table(
    index=['Region', 'Brain', 'Group'],
    columns='Sample',
    values='Relative Intensity'
).dropna()  

# Ratio Band 1 / Band 2
df_pivot['Ratio'] = df_pivot['Band 1'] / df_pivot['Band 2']



In [18]:

# Flatten df_pivot to long format for seaborn
df_plot = df_pivot.reset_index()

custom_colors = sns.color_palette("Set1")

fig, ax = plt.subplots(1, 1, figsize=(7, 5))

# Boxplot: filled boxes with custom colors, black edges, no fliers
_ = sns.boxplot(data=df_plot, x="Region", y="Ratio", hue="Group", palette=custom_colors,   
    showfliers=False, boxprops={'edgecolor': 'black',"alpha":0.6},showcaps=True, ax=ax)

# Stripplot: black data points, dodge for hue groups
_ = sns.stripplot(data=df_plot, x="Region", y="Ratio", hue="Group", color='black', dodge=True, jitter=True,
    alpha=0.8, marker='o', linewidth=0.5, ax=ax, size=5)

# Remove duplicate legends (keep only first legend with 2 entries)
handles, labels = ax.get_legend_handles_labels()
labels = ["A (11 a.m.)","B (11 p.m.)"]
_ = ax.legend(handles[:2], labels[:2], title="Group", bbox_to_anchor=(1.05, 1), loc='upper left', handletextpad=0.5)

# Remove box/frame
for spine in ax.spines.values():
    spine.set_visible(False)

# Light grid
ax.grid(True, axis="y", which='major', color='lightgrey', linestyle='-', linewidth=0.5)
ax.grid(True, axis="y", which='minor', color='lightgrey', linestyle='--', linewidth=0.25)

_ = ax.hlines(y=1,xmin=ax.get_xlim()[0]-0.5,xmax=ax.get_xlim()[1]+0.5, colors="k", lw=2, linestyles="-.")

# Set labels
_ = ax.set_ylabel("Isoform Ratio (a.u.)")
_ = ax.set_xlabel("Regions")

# Despine top and right
sns.despine(ax=ax, left=True, bottom=True)

ax.xaxis.set_tick_params(width=0, which="both")
ax.yaxis.set_tick_params(width=0, which="both")

plt.tight_layout()
plt.savefig("results_figures/AQP4_Isoform_Ratio_Boxplot.png")
#plt.show()
plt.close()

  _ = sns.boxplot(data=df_plot, x="Region", y="Ratio", hue="Group", palette=custom_colors,

Setting a gradient palette using color= is deprecated and will be removed in v0.14.0. Set `palette='dark:black'` for the same effect.

  _ = sns.stripplot(data=df_plot, x="Region", y="Ratio", hue="Group", color='black', dodge=True, jitter=True,


## Statistics: Isoform Ratio

In [19]:
region_list = ["CTX", "TH", "HIP", "CB", "OB"]
results = []

for region in region_list:
    # Extract lines for Band 1 and Band 2 only
    region_lines = [line for line in line_list if line.region == region and line.sample in ['Band 1', 'Band 2']]
    
    if not region_lines:
        continue
    
    # Create DataFrame 
    df_region = pd.DataFrame({
        "Group": [line.group for line in region_lines],
        "Band": [line.sample for line in region_lines],
        "Brain": [line.brain for line in region_lines],  
        "Relative Intensity": [line.nIntensity for line in region_lines]
    })
    
    # Pivot to get Band 1 and Band 2 intensities per mouse
    df_pivot = df_region.pivot_table(index=["Brain", "Group"], columns="Band", values="Relative Intensity")

    # Drop rows with missing data or non-positive values
    df_pivot = df_pivot.dropna()
    df_pivot = df_pivot[(df_pivot["Band 1"] > 0) & (df_pivot["Band 2"] > 0)]

    # Calculate the ratio per brain
    df_pivot["Ratio (1/2)"] = df_pivot["Band 1"] / df_pivot["Band 2"]

    # Group by experimental group
    groupA = df_pivot[df_pivot.index.get_level_values("Group") == "A"]["Ratio (1/2)"]
    groupB = df_pivot[df_pivot.index.get_level_values("Group") == "B"]["Ratio (1/2)"]

    # Run Mann–Whitney U test
    if len(groupA) >= 3 and len(groupB) >= 3:
        stat, p = mannwhitneyu(groupA, groupB, alternative='two-sided')
    else:
        stat, p = float('nan'), float('nan')

    # Store result
    results.append({"Region": region, "Group A Mean": groupA.mean(), "Group B Mean": groupB.mean(), "U Statistic": stat,
        "p-value": p, "Significant": "Yes" if p < 0.05 else "No"})

# Create final result DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv("figures/Statistical_Ratio_Comparison.csv", index=False)
#results_df
