In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

# Set plot style
try:
    plt.style.use('seaborn-v0_8-whitegrid')
except:
    plt.style.use('ggplot')

plt.rcParams['figure.figsize'] = (10, 6)

# Biofilm Viability (Bar Plot)

In [None]:
# Load Barplot Data
df_bar = pd.read_csv('biofilm_barplot_data.csv')
print("Barplot Data:")
display(df_bar.head())

In [None]:
# Visualization: Stacked Bar Plot for Intact/Damaged
# Create a plot for each Condition-Cultivation pair

conditions = df_bar['condition'].unique()
cultivations = df_bar['cultivation'].unique()

for cond in conditions:
    for cult in cultivations:
        subset = df_bar[(df_bar['condition'] == cond) & (df_bar['cultivation'] == cult)]
        if subset.empty:
            continue
            
        fig, ax = plt.subplots(figsize=(8, 5))
        
        # Data for plotting
        days = subset['day'].astype(str)
        intact = subset['intact']
        damaged = subset['damaged']
        error = subset['error']
        
        # Plotting Stacked Bars
        # Intact (Live) in Green and Damaged (Dead) in Red.
        
        p1 = ax.bar(days, intact, label='Intact', color='#2ecc71', yerr=error, capsize=5)
        p2 = ax.bar(days, damaged, bottom=intact, label='Damaged', color='#e74c3c')
        
        ax.set_title(f'Biofilm Viability: {cond} - {cult}')
        ax.set_xlabel('Day')
        ax.set_ylabel('Percentage (%)')
        ax.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
        ax.set_ylim(0, 110)
        ax.grid(True, axis='y', linestyle='--', alpha=0.7)
        
        plt.tight_layout()
        plt.show()

# Biofilm Statistics (Box Plot)

In [None]:
# Load Boxplot Data
df_box = pd.read_csv('biofilm_boxplot_data.csv')
print("Boxplot Data:")
display(df_box.head())

In [None]:
# Visualization: Box Plot from Summary Statistics
# Using matplotlib.axes.Axes.bxp

def parse_fliers(fliers_str):
    if pd.isna(fliers_str) or fliers_str == '':
        return []
    try:
        return [float(x) for x in str(fliers_str).split(',') if x.strip()]
    except:
        return []

for cond in conditions:
    for cult in cultivations:
        subset = df_box[(df_box['condition'] == cond) & (df_box['cultivation'] == cult)]
        if subset.empty:
            continue
            
        # Prepare data structure for bxp
        stats = []
        for _, row in subset.iterrows():
            item = {
                'label': str(int(row['day'])),
                'med': row['median'],
                'q1': row['q1'],
                'q3': row['q3'],
                'whislo': row['whisker_low'],
                'whishi': row['whisker_high'],
                'fliers': parse_fliers(row.get('fliers', ''))
            }
            stats.append(item)
            
        fig, ax = plt.subplots(figsize=(8, 5))
        
        # Draw boxplot
        ax.bxp(stats, showfliers=True, patch_artist=True, 
               boxprops=dict(facecolor='lightblue', color='blue'),
               medianprops=dict(color='red', linewidth=2))
        
        ax.set_title(f'Biofilm Structure Stats: {cond} - {cult}')
        ax.set_xlabel('Day')
        ax.set_ylabel('Value')
        ax.grid(True, axis='y', linestyle='--', alpha=0.7)
        
        plt.tight_layout()
        plt.show()

# Species Distribution (Line Plot)

Analysis of species distribution dynamics over time.

**Key Observation (Dysbiotic HOBIC):**
- **Orange (V. parvula)** dominates initially (>90% at Day 1-3).
- **Red (P. gingivalis)** shows a distinctive "delayed surge". It remains near 0% until Day 10, then rapidly increases to ~18% by Day 21. This contrasts with Static culture where Red is present (~10-30%) throughout.

In [None]:
# Load Species Distribution Data
df_species = pd.read_csv('species_distribution_data.csv')
print("Species Distribution Data:")
display(df_species.head())

In [None]:
# Visualization: Species Distribution over Time

# Define color mapping for species to match their names
color_map = {
    'Blue': '#3498db',    # S. oralis
    'Green': '#2ecc71',   # A. naeslundii
    'Yellow': '#f1c40f',  # V. dispar (Commensal)
    'Orange': '#e67e22',  # V. parvula (Dysbiotic)
    'Purple': '#9b59b6',  # F. nucleatum
    'Red': '#e74c3c'      # P. gingivalis
}

for cond in conditions:
    for cult in cultivations:
        subset = df_species[(df_species['condition'] == cond) & (df_species['cultivation'] == cult)]
        if subset.empty:
            continue
            
        fig, ax = plt.subplots(figsize=(10, 6))
        
        # Get list of species present in this condition
        species_list = subset['species'].unique()
        
        for species in species_list:
            sp_data = subset[subset['species'] == species].sort_values('day')
            
            # Plot line with markers
            ax.plot(sp_data['day'], sp_data['median'], marker='o', label=species, 
                    color=color_map.get(species, 'gray'), linewidth=2)

        ax.set_title(f'Species Distribution Dynamics: {cond} - {cult}')
        ax.set_xlabel('Day')
        ax.set_ylabel('Percentage (%)')
        ax.set_ylim(-5, 105)
        ax.legend(title='Species', loc='upper right', bbox_to_anchor=(1.15, 1))
        ax.grid(True, linestyle='--', alpha=0.7)
        
        # Highlight specific observations
        if cond == 'Dysbiotic' and cult == 'HOBIC':
            # Highlight the late surge of Red
            red_data = subset[subset['species'] == 'Red']
            if not red_data.empty:
                # Annotate the surge
                last_point = red_data[red_data['day'] == 21]
                if not last_point.empty:
                     ax.annotate('Delayed Surge', 
                                xy=(21, last_point['median'].values[0]), 
                                xytext=(15, 40),
                                arrowprops=dict(facecolor='black', shrink=0.05))

        plt.tight_layout()
        plt.show()

# Estimated Absolute Volume by Species

Calculated as: `Total Volume (Boxplot Median) * Species Ratio (Distribution Median / 100)`.
This represents the estimated absolute biomass of each species.

In [None]:
# Calculate Estimated Absolute Volume
# Formula: Total Volume (from Boxplot) * Species Ratio (from Distribution) / 100

# Merge DataFrames
# df_box has 'median' which is Total Volume
# df_species has 'median' which is Species Ratio (%)

# Rename columns for clarity before merge
df_vol = df_box[['condition', 'cultivation', 'day', 'median']].rename(columns={'median': 'total_volume'})
df_ratio = df_species[['condition', 'cultivation', 'day', 'species', 'median']].rename(columns={'median': 'species_ratio'})

# Merge on condition, cultivation, day
df_calc = pd.merge(df_ratio, df_vol, on=['condition', 'cultivation', 'day'], how='left')

# Calculate Absolute Volume
df_calc['absolute_volume'] = df_calc['total_volume'] * (df_calc['species_ratio'] / 100)

display(df_calc.head())

In [None]:
# Visualization: Stacked Area Plot for Absolute Volume

for cond in conditions:
    for cult in cultivations:
        subset = df_calc[(df_calc['condition'] == cond) & (df_calc['cultivation'] == cult)]
        if subset.empty:
            continue
            
        fig, ax = plt.subplots(figsize=(10, 6))
        
        # Pivot data for stackplot
        pivot_data = subset.pivot(index='day', columns='species', values='absolute_volume').fillna(0)
        days = pivot_data.index
        species_cols = pivot_data.columns
        
        # Prepare data for stackplot
        values = [pivot_data[sp] for sp in species_cols]
        colors = [color_map.get(sp, 'gray') for sp in species_cols]
        
        ax.stackplot(days, values, labels=species_cols, colors=colors, alpha=0.8)
        
        ax.set_title(f'Estimated Absolute Biomass Volume: {cond} - {cult}')
        ax.set_xlabel('Day')
        ax.set_ylabel('Estimated Volume (Arbitrary Units)')
        ax.legend(loc='upper left')
        ax.grid(True, linestyle='--', alpha=0.5)
        
        plt.tight_layout()
        plt.show()