In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- 1. Load the data ---
try:
    df_raw = pd.read_csv('In vivo biodistribution.csv')
    if len(df_raw.columns) == 1:
        df_raw = pd.read_csv('In vivo biodistribution.csv', delimiter='\t')
except FileNotFoundError:
    print("Error: 'In vivo biodistribution.csv' not found. Please make sure the CSV file is in the same directory.")
    exit()

# --- 2. Prepare and Aggregate Data for Plotting (Calculate Mean and SEM) ---

df_raw[['Organ_Name', 'Time_Point']] = df_raw['Organ'].str.split('_', expand=True)
unique_organ_names = df_raw['Organ_Name'].unique()
time_points = ['2h', '24h', '1w']

# Create a new DataFrame to store aggregated data (Mean and SEM) for each Organ-Time pair
df_agg_list = []

for organ_name in unique_organ_names:
    for time_point in time_points:
        organ_time_label = f'{organ_name}_{time_point}'
        subset_df = df_raw[df_raw['Organ'] == organ_time_label]

        if not subset_df.empty:
            # Get concentration columns for Fe and Au
            fe_conc_cols = [col for col in subset_df.columns if 'Fe concentration' in col]
            au_conc_cols = [col for col in subset_df.columns if 'Au concentration' in col]

            # Calculate Mean and SEM for Fe
            fe_mean = subset_df[fe_conc_cols].mean(axis=1).values[0]
            fe_sem = subset_df[fe_conc_cols].sem(axis=1).values[0]

            # Calculate Mean and SEM for Au
            au_mean = subset_df[au_conc_cols].mean(axis=1).values[0]
            au_sem = subset_df[au_conc_cols].sem(axis=1).values[0]

            df_agg_list.append({
                'Organ_Name': organ_name,
                'Time_Point': time_point,
                'Fe_Mean': fe_mean,
                'Fe_SEM': fe_sem,
                'Au_Mean': au_mean,
                'Au_SEM': au_sem
            })

df_agg = pd.DataFrame(df_agg_list)

# --- 3. Plotting Functions ---

def plot_biodistribution_grouped(data_frame_agg, element_mean_col, element_sem_col, output_filename):
    """
    Plots the biodistribution for a given element using mean and SEM error bars,
    with custom colors and grouped bars for different time points within each organ.
    """
    fig, ax = plt.subplots(figsize=(10, 8))

    unique_organ_names = data_frame_agg['Organ_Name'].unique()
    time_points = ['2h', '24h', '1w']

    # Custom colors for time points
    colors = {
        '2h': 'red',
        '24h': 'm',
        '1w': 'lime'
    }

    # Custom labels for legend
    legend_label_mapping = {
        '2h': '2 h',
        '24h': '24 h',
        '1w': '1 week'
    }

    # Bar width and spacing for grouping
    bar_width = 0.25
    spacing_within_group = 0.05
    group_spacing = 0.7
    group_total_width = (bar_width * len(time_points)) + (spacing_within_group * (len(time_points) - 1))
    x_organ_groups = np.arange(len(unique_organ_names)) * (group_total_width + group_spacing)

    legend_handles = []
    legend_labels = []

    for i, organ_name in enumerate(unique_organ_names):
        organ_data = data_frame_agg[data_frame_agg['Organ_Name'] == organ_name]
        group_start_x = x_organ_groups[i] - group_total_width / 2 + bar_width / 2

        for j, time_point in enumerate(time_points):
            time_data = organ_data[organ_data['Time_Point'] == time_point]
            if not time_data.empty:
                x_pos = group_start_x + (j * (bar_width + spacing_within_group))
                mean_val = time_data[element_mean_col].values[0]
                sem_val = time_data[element_sem_col].values[0]

                bar = ax.bar(x_pos, mean_val, bar_width,
                             yerr=sem_val, capsize=4,
                             color=colors[time_point], linewidth=0.8,
                             error_kw={'ecolor': colors[time_point], 'elinewidth': 1.5})

                if legend_label_mapping[time_point] not in legend_labels:
                    legend_handles.append(bar[0])
                    legend_labels.append(legend_label_mapping[time_point])

    # --- Customize Plot ---
    y_label_text = 'Concentration (µg/g)'
    if 'Fe' in element_mean_col:
        y_label_text = 'Fe concentration/tissue (µg/g)'
    elif 'Au' in element_mean_col:
        y_label_text = 'Au concentration/tissue (µg/g)'
    ax.set_ylabel(y_label_text, fontweight='bold', fontsize=20, fontfamily='Arial')

    ax.set_xticks(x_organ_groups)
    ax.set_xticklabels(unique_organ_names, rotation=45, ha='right', fontsize=18, fontfamily='Arial', fontweight='bold')

    ax.tick_params(axis='y', which='both', length=10, width=2.5, labelsize=18)
    ax.tick_params(axis='x', which='both', length=8, width=2, labelsize=18)

    max_val = (data_frame_agg[element_mean_col] + data_frame_agg[element_sem_col]).max()
    ax.set_ylim(bottom=0, top=max_val * 1.1)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(3)
    ax.spines['bottom'].set_linewidth(3)

    ax.legend(legend_handles, legend_labels, loc='upper right', frameon=False, fontsize=18)

    plt.tight_layout()
    plt.savefig(output_filename, dpi=600, format='tif')
    plt.show()

# --- 4. Call Plotting Functions ---

plot_biodistribution_grouped(df_agg, 'Fe_Mean', 'Fe_SEM', 'In_vivo_biodistribution_Fe.tif')
plot_biodistribution_grouped(df_agg, 'Au_Mean', 'Au_SEM', 'In_vivo_biodistribution_Au.tif')


In [None]:
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# --- 5. Statistical Analysis: One-way ANOVA + Tukey HSD for 2h Time Point ---

# Filter the original data for 2h time point
df_2h = df_raw[df_raw['Time_Point'] == '2h']

# Extract Fe and Au concentration columns
fe_cols = [col for col in df_2h.columns if 'Fe concentration' in col]
au_cols = [col for col in df_2h.columns if 'Au concentration' in col]

# Prepare long-format data for ANOVA and Tukey test
def prepare_long_data(df, conc_cols, metal_label):
    """
    Converts wide-format dataframe into long format for ANOVA/Tukey test.
    """
    data_long = pd.DataFrame()

    for idx, row in df.iterrows():
        organ = row['Organ_Name']
        for col in conc_cols:
            data_long = pd.concat([
                data_long,
                pd.DataFrame({
                    'Organ': [organ],
                    'Value': [row[col]],
                    'Metal': [metal_label]
                })
            ], ignore_index=True)

    return data_long

# Create long-format data for both metals
fe_long = prepare_long_data(df_2h, fe_cols, 'Fe')
au_long = prepare_long_data(df_2h, au_cols, 'Au')

# Function to perform One-way ANOVA and Tukey HSD
def run_anova_tukey(data_long, metal):
    """
    Runs one-way ANOVA and Tukey HSD test for the specified metal.
    """
    print(f"\n--- {metal} One-way ANOVA ---")
    groups = [group['Value'].values for name, group in data_long.groupby('Organ')]
    anova_result = f_oneway(*groups)
    print(f"ANOVA F = {anova_result.statistic:.3f}, p = {anova_result.pvalue:.4f}")

    if anova_result.pvalue < 0.05:
        print(f"\n--- {metal} Tukey HSD Post-hoc ---")
        tukey = pairwise_tukeyhsd(endog=data_long['Value'],
                                  groups=data_long['Organ'],
                                  alpha=0.05)
        print(tukey.summary())
    else:
        print(f"No significant differences found among organs for {metal} (p ≥ 0.05).")

# Run statistical analysis for Fe and Au
run_anova_tukey(fe_long, 'Fe')
run_anova_tukey(au_long, 'Au')
