In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import csv
import numpy as np
from matplotlib.ticker import PercentFormatter

In [None]:
# --- Settings ---
data_dir = "D:\Bacterial shell Fig3_erosion"
series_names = ['XB794_mouse1', 'XB794_mouse2', '803_mouse2']
days = list(range(5))
threshold = 84.53

In [None]:
# --- Store results ---
proportions = {series: [] for series in series_names}

In [None]:
# --- Process files ---
for series in series_names:
    for day in days:
        pattern = os.path.join(data_dir, f"{series}_Day{day}*.csv")
        matching_files = glob.glob(pattern)

        if not matching_files:
            print(f"No file found for {series} Day {day}")
            proportions[series].append(None)
            continue

        # Use the first matching file
        filename = matching_files[0]
        df = pd.read_csv(filename)

        try:
            # Use the 8th column (index 7)
            h_values = df.iloc[:, 7].dropna()[1:].astype(float)
            proportion = (h_values > threshold).sum() / len(h_values)
            proportions[series].append(proportion)
        except Exception as e:
            print(f"Error reading {filename}: {e}")
            proportions[series].append(None)

In [None]:
# Mapping original series names to simplified names
rename_map = {
    "XB794_mouse1": "Exp1",
    "XB794_mouse2": "Exp2",
    "803_mouse2": "Exp3"
}

# Apply the rename
renamed_proportions = {rename_map[k]: v for k, v in proportions.items()}
renamed_series_names = list(renamed_proportions.keys())

# Plotting
plt.figure(figsize=(10, 6))

# Marker styles for each series
marker_styles = ['o', '^', 's']  # circle, triangle, square

# Scatter plot for each experiment with a distinct marker
for i, (series, values) in enumerate(renamed_proportions.items()):
    plt.scatter(days, values, label=series, color='black', marker=marker_styles[i], s=120)

# Plot the daily mean as a short horizontal line
for i, day in enumerate(days):
    daily_values = [renamed_proportions[series][i] for series in renamed_series_names if renamed_proportions[series][i] is not None]
    if daily_values:
        avg = sum(daily_values) / len(daily_values)
        plt.hlines(y=avg, xmin=day - 0.3, xmax=day + 0.3,
                   colors='blue', linestyles='solid', linewidth=1.5,
                   label='Average' if i == 0 else None)

# Formatting with custom font sizes
plt.xlabel('Day', fontsize=28)
plt.ylabel(f'Proportion > {threshold}', fontsize=28)
plt.gca().yaxis.set_major_formatter(PercentFormatter(xmax=1))  # Convert to %
plt.title('Proportion of Intensity > 84.53 over Days', fontsize=30, pad=20)
plt.xticks(days, fontsize=24)
plt.yticks(fontsize=24)
plt.grid(True)
plt.legend(fontsize=24)
plt.tight_layout()
plt.savefig("D:/Bacterial shell Fig3_erosion/proportion_plot.svg", bbox_inches='tight')
plt.show()

In [None]:
# Prepare header and data
output_data = [("Day", "Average", "Standard Deviation")]
for i, day in enumerate(days):
    daily_values = [renamed_proportions[series][i] for series in renamed_series_names
                    if renamed_proportions[series][i] is not None]
    if daily_values:
        avg = sum(daily_values) / len(daily_values)
        std = np.std(daily_values, ddof=1)  # Sample standard deviation
        output_data.append((day, avg, std))

# Save to CSV
output_csv_path = "D:/Bacterial shell Fig3_erosion/average_sd_per_day.csv"
with open(output_csv_path, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(output_data)

print(f"Exported average and SD per day to {output_csv_path}")

Exported average and SD per day to D:/Bacterial shell Fig3_erosion/average_sd_per_day.csv
