In [10]:
"""
Date: April 18, 2025

Description:
    This script shows how to:
        1. Generate a simple bar plot comparing two groups (Genotype_A vs. Wildtype) 
           with corresponding swarm points for individual data. A Mann-Whitney U 
           test is performed, and the result is annotated on the plot.
        2. Generate a grouped bar plot showing Genotype_A vs. Wildtype under two 
           different treatments (Treated vs. Untreated), with swarm points to 
           visualize data distribution.

Usage:
    python generate_barplots.py

"""

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy import stats


def basic_barplot_comparison():
    """
    Generate a bar plot with swarm points comparing two groups (Genotype_A vs. Wildtype).
    A Mann-Whitney U test is performed, and the result is annotated.
    """
    # Sample data
    genotype_A_values = [67, 56, 62, 70, 89]
    wildtype_values = [42, 55, 61, 51, 62]

    # Merge data and label groups
    vals = genotype_A_values + wildtype_values
    groups = ['Genotype_A'] * len(genotype_A_values) + ['Wildtype'] * len(wildtype_values)

    # Build a DataFrame for Seaborn
    df = pd.DataFrame({'vals': vals, 'groups': groups})

    # Statistical test: Mann-Whitney U
    test_result = stats.mannwhitneyu(genotype_A_values, wildtype_values)
    print("Mann-Whitney U test result:", test_result)

    # Plot
    plt.figure(figsize=(2, 4))

    ax = sns.barplot(
        data=df,
        x='groups',
        y='vals',
        hue='groups',
        dodge=False,
        legend=False,
        palette=['#1f77b4', '#ff7f0e'],  # Updated colors
        capsize=0.5,
        edgecolor='0.2',
        lw=2.5,
        errorbar='se',
        err_kws={'linewidth': 2.5, 'color': '0.2'}
    )

    # Add individual data points as a swarm
    sns.swarmplot(
        data=df,
        x='groups',
        y='vals',
        hue='groups',
        dodge=False,
        marker='o',
        size=10,
        edgecolor='0.2',
        linewidth=2.5,
        facecolors='none',
        legend=False
    )

    # Annotate significance
    prev_lim = ax.get_ylim()[1]
    max_val = max(vals)
    plt.plot(
        [0, 0, 1, 1],
        [max_val + 5, max_val + 8, max_val + 8, max_val + 5],
        lw=2.5,
        color='0.2',
        clip_on=False
    )
    plt.text(
        0.5,
        max_val + 8,
        s='*',
        ha='center',
        size=20,
        weight='bold',
        color='0.2'
    )

    # Make plot look nice
    for axis in ['bottom', 'left']:
        ax.spines[axis].set_linewidth(2.5)
        ax.spines[axis].set_color('0.2')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    # Ticks and labels
    plt.xticks(size=14, rotation=35, rotation_mode='anchor', ha='right', weight='bold', color='0.2')
    plt.yticks(size=14, weight='bold', color='0.2')
    ax.tick_params(width=2.5, color='0.2')

    plt.ylim(top=prev_lim)

    # Save figures
    plt.savefig('bar_test.svg', bbox_inches='tight')
    plt.savefig('bar_test.png', bbox_inches='tight', dpi=250, facecolor=ax.get_facecolor())
    plt.close()


def grouped_barplot_with_treatments():
    """
    Generate a grouped bar plot (Genotype_A vs. Wildtype) with two different treatments
    (Treated vs. Untreated), including swarm points for data distribution.
    """
    # Larger sample data
    genotype_A_values = [67, 56, 62, 70, 89, 45, 37, 50, 60, 34]
    wildtype_values = [42, 55, 61, 51, 62, 25, 41, 35, 38, 22]

    # Merge into DataFrame
    vals = genotype_A_values + wildtype_values
    genotype_labels = ['Genotype_A'] * len(genotype_A_values) + ['Wildtype'] * len(wildtype_values)
    treatments = (
        ['Treated'] * 5 + ['Untreated'] * 5 +
        ['Treated'] * 5 + ['Untreated'] * 5
    )

    df = pd.DataFrame({
        'Value': vals,
        'Genotype': genotype_labels,
        'Treatment': treatments
    })

    plt.figure(figsize=(4, 4))
    ax = sns.barplot(
        data=df,
        x='Genotype',
        y='Value',
        hue='Treatment',
        palette=['#1f77b4', '#ff7f0e'],  # Updated colors
        capsize=0.25,
        edgecolor='0.2',
        lw=2.5,
        errorbar='se',
        err_kws={'linewidth': 2.5, 'color': '0.2'}
    )

    # Overlay swarm plot
    sns.swarmplot(
        data=df,
        x='Genotype',
        y='Value',
        hue='Treatment',
        dodge=True,
        marker='o',
        size=10,
        edgecolor='0.2',
        linewidth=2.5,
        facecolors='none'
    )

    # Legend: use the barplot handles so the color squares show up
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(
        handles=handles[:2],  # Barplot handles (filled colors)
        labels=labels[:2],
        loc='upper center',
        bbox_to_anchor=(0.5, -0.15),
        ncol=2,
        frameon=False,
        fontsize=14,
        labelcolor='0.2',
        prop={'weight': 'bold'}
    )

    # Adjust spines
    for axis in ['bottom', 'left']:
        ax.spines[axis].set_linewidth(2.5)
        ax.spines[axis].set_color('0.2')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    # Ticks and labels
    plt.xticks(size=14, ha='center', weight='bold', color='0.2')
    plt.yticks(size=14, weight='bold', color='0.2')
    ax.tick_params(width=2.5, color='0.2')

    plt.xlabel('')
    plt.ylabel('Values', size=14, weight='bold', color='0.2')

    # Save figures
    plt.savefig('bar_test_big.svg', bbox_inches='tight')
    plt.savefig('bar_test_big.png', bbox_inches='tight', dpi=250, facecolor=ax.get_facecolor())
    plt.close()


basic_barplot_comparison()
grouped_barplot_with_treatments()

Mann-Whitney U test result: MannwhitneyuResult(statistic=np.float64(22.5), pvalue=np.float64(0.046532985074510584))
