In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Patch
from matplotlib import rcParams
import os

# -------------------------------------------------------------------
# EMAIL DIVERSITY ANALYSIS - DISTINCT METRICS ONLY
# -------------------------------------------------------------------
df = pd.read_csv("../output/diversity/diversity_results_body.csv")
df["model_clean"] = df.model.apply(lambda x: x.split("_")[-1])
df["method"] = df.model.apply(lambda x: "_".join(x.split("_")[:-1]))
df["model_clean"] = df["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())

# -------------------------------------------------------------------
# Academic Config
# -------------------------------------------------------------------
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']
rcParams['font.size'] = 11
rcParams['axes.linewidth'] = 0.8
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

colors = sns.color_palette("colorblind", n_colors=5)

# -------------------------------------------------------------------
# Data Loading - Only Distinct Metrics
# -------------------------------------------------------------------
df_lexical = df[["method", "model_clean", 
                "lexical_diversity_distinct_1", 
                "lexical_diversity_distinct_2",
                "lexical_diversity_distinct_3"]]

all_methods = sorted(df_lexical['method'].unique())
models = df_lexical['model_clean'].unique()

# -------------------------------------------------------------------
# Lexical Diversity Plot - Distinct Metrics Only
# -------------------------------------------------------------------
lexical_measures = ['lexical_diversity_distinct_1', 'lexical_diversity_distinct_2', 'lexical_diversity_distinct_3']
lexical_labels = ['Distinct-1', 'Distinct-2', 'Distinct-3']

lexical_stats = {}
for model in models:
    lexical_stats[model] = {}
    for measure in lexical_measures:
        by_method = df_lexical[df_lexical['model_clean'] == model].groupby('method')[measure].agg(['mean', 'std'])
        lexical_stats[model][measure] = by_method

lexical_max = {}
for measure in lexical_measures:
    measure_max = 0
    for model in models:
        if measure in lexical_stats[model] and not lexical_stats[model][measure].empty:
            try:
                measure_max = max(measure_max, lexical_stats[model][measure]['mean'].max() * 1.2)
            except:
                pass
    lexical_max[measure] = measure_max

fig_lexical = plt.figure(figsize=(20, 12))
gs_lexical = GridSpec(4, len(models), figure=fig_lexical, height_ratios=[1, 1, 1, 0.15])

for i, measure in enumerate(lexical_measures):
    for j, model in enumerate(models):
        ax = fig_lexical.add_subplot(gs_lexical[i, j])
        
        model_data = df_lexical[df_lexical['model_clean'] == model]
        
        if model in lexical_stats and measure in lexical_stats[model]:
            model_stats = lexical_stats[model][measure]
            if not model_stats.empty:
                means = model_stats['mean'].values
                stds = model_stats['std'].values
                
                x_pos = np.arange(len(all_methods))
                
                bars = ax.bar(x_pos, 
                              means, 
                              yerr=stds, 
                              capsize=4, 
                              width=0.7,
                              color=colors[:len(all_methods)], 
                              edgecolor='black',
                              linewidth=0.8, 
                              alpha=0.8)
                
                for m_idx, method in enumerate(all_methods):
                    method_data = model_data[model_data['method'] == method][measure]
                    if not method_data.empty:
                        jittered_x = np.random.normal(m_idx, 0.05, size=len(method_data))
                        ax.scatter(jittered_x, method_data, color='black', s=20, alpha=0.6, zorder=10)
                
                for bar in bars:
                    height = bar.get_height()
                    ax.annotate(f'{height:.3f}',
                                xy=(bar.get_x() + bar.get_width() / 2, height),
                                xytext=(0, 3), textcoords="offset points",
                                ha='center', va='bottom', fontsize=9)
        
        if i == len(lexical_measures) - 1:
            ax.set_xticks(np.arange(len(all_methods)))
            ax.set_xticklabels([method.replace('_', ' ').title() for method in all_methods], 
                              rotation=45, ha='right', fontsize=9)
        else:
            ax.set_xticklabels([])
            ax.set_xticks(np.arange(len(all_methods)))
        
        if measure in lexical_max:
            ax.set_ylim(0, lexical_max[measure])
        
        ax.grid(True, linestyle='--', alpha=0.3, axis='y')
        
        if i == 0:
            ax.set_title(model.replace('_', ' '), fontsize=14, fontweight='bold')
        
        if j == 0:
            ax.set_ylabel(lexical_labels[i], fontweight='bold', fontsize=12)

legend_ax = fig_lexical.add_subplot(gs_lexical[3, :])
legend_ax.axis('off')

legend_elements = [Patch(facecolor=colors[i], 
                         edgecolor='black',
                        label=method.replace('_', ' ').title())
                  for i, method in enumerate(all_methods)]

legend_ax.legend(handles=legend_elements, 
                 loc='center', 
                 ncol=min(6, len(all_methods)),
                 frameon=True, 
                 fontsize=12, 
                 title='Methods', 
                 title_fontsize=14)

plt.tight_layout(rect=[0, 0.02, 1, 0.95])

output_dir = "../output/textualdiversity"
os.makedirs(output_dir, exist_ok=True)
output_path_lexical_pdf = os.path.join(output_dir, "lexical_diversity_distinct_metrics.pdf")
output_path_lexical_png = os.path.join(output_dir, "lexical_diversity_distinct_metrics.png")

plt.savefig(output_path_lexical_pdf, format='pdf', dpi=300, bbox_inches='tight')
plt.savefig(output_path_lexical_png, format='png', dpi=300, bbox_inches='tight')

print(f"Distinct-n metrics plot saved to: {output_dir}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Patch
from matplotlib import rcParams
import os

# -------------------------------------------------------------------
# EMAIL BODY SEMANTIC DIVERSITY ANALYSIS
# -------------------------------------------------------------------
df = pd.read_csv("../output/diversity/diversity_results_body.csv")
df["model_clean"] = df.model.apply(lambda x: x.split("_")[-1])
df["method"] = df.model.apply(lambda x: "_".join(x.split("_")[:-1]))
df["model_clean"] = df["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())

# -------------------------------------------------------------------
# Academic Config
# -------------------------------------------------------------------
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']
rcParams['font.size'] = 11
rcParams['axes.linewidth'] = 0.8
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

colors = sns.color_palette("colorblind", n_colors=5)

# -------------------------------------------------------------------
# Data Loading
# -------------------------------------------------------------------
df_plot = df[["method", "model_clean", "semantic_diversity"]]
all_methods = sorted(df_plot['method'].unique())

measures = ['semantic_diversity']
measure_labels = ['Semantic Diversity']
models = df_plot['model_clean'].unique()

stats_data = {}
for model in df_plot['model_clean'].unique():
    stats_data[model] = {}
    for measure in ['semantic_diversity']:
        by_method = df_plot[df_plot['model_clean'] == model].groupby('method')[measure].agg(['mean', 'std'])
        stats_data[model][measure] = by_method

# -------------------------------------------------------------------
# Calculate global y-limit for consistent scale across plots
# -------------------------------------------------------------------
global_max = 0
for model in models:
    if model in stats_data and 'semantic_diversity' in stats_data[model]:
        model_stats = stats_data[model]['semantic_diversity']
        if not model_stats.empty:
            global_max = max(global_max, model_stats['mean'].max() * 1.2)

global_y_lim = (0, global_max)

# -------------------------------------------------------------------
# Plot Config
# -------------------------------------------------------------------
fig = plt.figure(figsize=(20, 8))
gs = GridSpec(2, len(models), figure=fig, height_ratios=[4, 1])

# -------------------------------------------------------------------
# Semantic Diversity Subplot
# -------------------------------------------------------------------
for j, model in enumerate(models):
    ax = fig.add_subplot(gs[0, j])
    
    model_data = df_plot[df_plot['model_clean'] == model]
    
    if model in stats_data and 'semantic_diversity' in stats_data[model]:
        model_stats = stats_data[model]['semantic_diversity']
        if not model_stats.empty:
            means = model_stats['mean'].values
            stds = model_stats['std'].values
            
            x_pos = np.arange(len(all_methods))
            
            bars = ax.bar(x_pos, 
                          means, 
                          yerr=stds, 
                          capsize=4, 
                          width=0.7,
                          color=colors[:len(all_methods)], 
                          edgecolor='black',
                          linewidth=0.8, 
                          alpha=0.8)
            
            for m_idx, method in enumerate(all_methods):
                method_data = model_data[model_data['method'] == method]['semantic_diversity']
                jittered_x = np.random.normal(m_idx, 0.05, size=len(method_data))
                ax.scatter(jittered_x, method_data, color='black', s=20, alpha=0.6, zorder=10)
            
            for bar in bars:
                height = bar.get_height()
                ax.annotate(f'{height:.3f}',
                            xy=(bar.get_x() + bar.get_width() / 2, height),
                            xytext=(0, 3), textcoords="offset points",
                            ha='center', va='bottom', fontsize=9)
    
    ax.set_xticks(np.arange(len(all_methods)))
    ax.set_xticklabels([method.replace('_', ' ').title() for method in all_methods], 
                      rotation=45, ha='right', fontsize=9)
    
    ax.set_ylim(global_y_lim)        
    ax.grid(True, linestyle='--', alpha=0.3, axis='y')
    
    ax.set_title(model.replace('_', ' '), fontsize=14, fontweight='bold')
    
    if j == 0:
        ax.set_ylabel('Semantic Diversity', fontweight='bold', fontsize=12)

# -------------------------------------------------------------------
# Legend
# -------------------------------------------------------------------
legend_ax = fig.add_subplot(gs[1, :])
legend_ax.axis('off')

legend_elements = [Patch(facecolor=colors[i], 
                        edgecolor='black',
                        label=method.replace('_', ' ').title())
                  for i, method in enumerate(all_methods)]

legend_ax.legend(handles=legend_elements, 
                loc='center', 
                ncol=min(6, len(all_methods)),
                frameon=True, 
                fontsize=12, 
                title='Methods', 
                title_fontsize=14)

plt.tight_layout(rect=[0, 0.02, 1, 0.95])

# -------------------------------------------------------------------
# Saving the figure
# -------------------------------------------------------------------
output_dir = "../output/textualdiversity"
os.makedirs(output_dir, exist_ok=True)
output_path_pdf = os.path.join(output_dir, "semantic_diversity_analysis.pdf")
output_path_png = os.path.join(output_dir, "semantic_diversity_analysis.png")

plt.savefig(output_path_pdf, format='pdf', dpi=300, bbox_inches='tight')
plt.savefig(output_path_png, format='png', dpi=300, bbox_inches='tight')

print(f"Semantic diversity visualization saved to: {output_dir}")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Patch
from matplotlib import rcParams
import matplotlib.ticker as mtick
import os

# -------------------------------------------------------------------
# SENTIMENT ANALYSIS
# -------------------------------------------------------------------
data = pd.read_csv('../output/diversity/diversity_results_body.csv')
data["model_clean"] = data.model.apply(lambda x: x.split("_")[-1])
data["method"] = data.model.apply(lambda x: "_".join(x.split("_")[:-1]))
data["model_clean"] = data["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())

df_sent = data[["method", "model_clean", "sentiment_neg", "sentiment_neu", "sentiment_pos"]]
sentiments = ['sentiment_neg', 'sentiment_neu', 'sentiment_pos']
sentiment_labels = ['Negative', 'Neutral', 'Positive']

models = df_sent['model_clean'].unique()

# -------------------------------------------------------------------
# ACADEMIC CONFIGURATION
# -------------------------------------------------------------------
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']
rcParams['font.size'] = 11
rcParams['axes.linewidth'] = 0.8
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

colors = sns.color_palette("colorblind", n_colors=6)

# -------------------------------------------------------------------
# Aggregation Statistics
# -------------------------------------------------------------------
stats_data = {}
for model in models:
    stats_data[model] = {}
    for sentiment in sentiments:
        by_method = df_sent[df_sent['model_clean'] == model].groupby('method')[sentiment].agg(['mean', 'std'])
        stats_data[model][sentiment] = by_method

# -------------------------------------------------------------------
# Plot Config
# -------------------------------------------------------------------
fig = plt.figure(figsize=(20, 12))
gs = GridSpec(4, len(models), figure=fig, height_ratios=[1, 1, 1, 0.15])
axes_dict = {}

y_limits = {}
for sentiment in sentiments:
    max_vals = []
    for model in models:
        if model in stats_data and sentiment in stats_data[model]:
            model_stats = stats_data[model][sentiment]
            if not model_stats.empty:
                means = model_stats['mean'].values
                if len(means) > 0:
                    max_vals.append(max(means) * 1.2)

    y_limits[sentiment] = (0, max(max_vals) if max_vals else 1.0)

# -------------------------------------------------------------------
# Subplot Grid
# -------------------------------------------------------------------
for i, sentiment in enumerate(sentiments):
    for j, model in enumerate(models):
        ax = fig.add_subplot(gs[i, j])
        axes_dict[(sentiment, model)] = ax

        model_data = df_sent[df_sent['model_clean'] == model]
        methods = model_data['method'].unique()

        if model in stats_data and sentiment in stats_data[model]:
            model_stats = stats_data[model][sentiment]
            if not model_stats.empty:
                means = model_stats['mean'].values
                stds = model_stats['std'].values
                x_pos = np.arange(len(methods))

                bars = ax.bar(x_pos,
                              means,
                              yerr=stds,
                              capsize=4,
                              width=0.7,
                              color=colors[:len(methods)],
                              edgecolor='black',
                              linewidth=0.8,
                              alpha=0.8)

                for m_idx, method in enumerate(methods):
                    method_data = model_data[model_data['method'] == method][sentiment]
                    jittered_x = np.random.normal(m_idx, 0.05, size=len(method_data))
                    ax.scatter(jittered_x, method_data, color='black', s=20, alpha=0.6, zorder=10)

                for bar in bars:
                    height = bar.get_height()
                    ax.annotate(f'{height * 100:.2f}%',
                        xy=(bar.get_x() + bar.get_width() / 2, height),
                        xytext=(0, 3), textcoords="offset points",
                        ha='center', va='bottom', fontsize=9)

        if i == len(sentiments) - 1:
            ax.set_xticks(np.arange(len(methods)))
            ax.set_xticklabels([method.replace('_', ' ').title().replace("Attr Prompting", "AttrPrompting").replace("Llama3B", "(Llama 3B)").replace("Llama8B", "(Llama 8B)").replace("Bare", "BARE") for method in methods],
                              rotation=45, ha='right', fontsize=9)
        else:
            ax.set_xticklabels([])
            ax.set_xticks(np.arange(len(methods)))

        ax.set_ylim(0,1)
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

        ax.grid(True, linestyle='--', alpha=0.3, axis='y')

        if i == 0:
            ax.set_title(model.replace('_', ' '), fontsize=14, fontweight='bold')

        if j == 0:
            ax.set_ylabel(sentiment_labels[i], fontweight='bold', fontsize=12)

# -------------------------------------------------------------------
# Legend Field
# -------------------------------------------------------------------
legend_ax = fig.add_subplot(gs[3, :])
legend_ax.axis('off')
unique_methods = df_sent['method'].unique()
legend_elements = [Patch(facecolor=colors[i],
                         edgecolor='black',
                         label=method.replace('_', ' ').title().replace("Attr Prompting", "AttrPrompting").replace("Llama3B", "(Llama 3B)").replace("Llama8B", "(Llama 8B)").replace("Bare", "BARE"))
                  for i, method in enumerate(unique_methods)]

legend_ax.legend(handles=legend_elements,
                 loc='center',
                 ncol=min(6, len(unique_methods)),
                 frameon=True,
                 fontsize=12,
                 title='Methods',
                 title_fontsize=14)

plt.tight_layout(rect=[0, 0.02, 1, 0.95])

# -------------------------------------------------------------------
# SAVE & SHOW
# -------------------------------------------------------------------
output_path_pdf = os.path.join("../output/sentiment", "sentiment_analysis_grid.pdf")
output_path_png = os.path.join("../output/sentiment", "sentiment_analysis_grid.png")

os.makedirs(os.path.dirname(output_path_pdf), exist_ok=True)

plt.savefig(output_path_pdf, format='pdf', dpi=300, bbox_inches='tight')
plt.savefig(output_path_png, format='png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import os
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Patch
from matplotlib import rcParams

# -------------------------------------------------------------------
# EMAIL VERBOSITY ANALYSIS
# -------------------------------------------------------------------
df = pd.read_csv('../output/diversity/diversity_results_body.csv')
df["model_clean"] = df.model.apply(lambda x: x.split("_")[-1])
df["method"] = df.model.apply(lambda x: "_".join(x.split("_")[:-1]))
df["model_clean"] = df["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())

# -------------------------------------------------------------------
# Academic Config
# -------------------------------------------------------------------
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']
rcParams['font.size'] = 11
rcParams['axes.linewidth'] = 0.8
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

colors = sns.color_palette("colorblind", n_colors=6)

# -------------------------------------------------------------------
# Data Loading
# -------------------------------------------------------------------
df_plot = df[["method", "model_clean", "avg_words_per_email", "avg_sentences_per_email", "avg_words_per_sentence"]]
measures = ['avg_words_per_email', 'avg_sentences_per_email', 'avg_words_per_sentence']
measure_labels = ['Avg Words per Email', 'Avg Sentences per Email', 'Avg Words per Sentence']
models = df_plot['model_clean'].unique()

stats_data = {}
for model in df_plot['model_clean'].unique():
    stats_data[model] = {}
    for measure in measures:
        by_method = df_plot[df_plot['model_clean'] == model].groupby('method')[measure].agg(['mean', 'std'])
        stats_data[model][measure] = by_method

# -------------------------------------------------------------------
# Plot Config
# -------------------------------------------------------------------
fig = plt.figure(figsize=(20, 12))
gs = GridSpec(4, len(models), figure=fig, height_ratios=[1, 1, 1, 0.15])  # Final Row for Legend

axes_dict = {}

y_limits = {}
for measure in measures:
    max_vals = []
    for model in models:
        if model in stats_data and measure in stats_data[model]:
            model_stats = stats_data[model][measure]
            if not model_stats.empty:
                means = model_stats['mean'].values
                stds = model_stats['std'].values
                if len(means) > 0:
                    max_vals.append(max(means) * 1.2)

    y_limits[measure] = (0, max(max_vals) if max_vals else 1)

# -------------------------------------------------------------------
# Subplot Grid
# -------------------------------------------------------------------
for i, measure in enumerate(measures):
    for j, model in enumerate(models):
        ax = fig.add_subplot(gs[i, j])
        axes_dict[(measure, model)] = ax

        model_data = df_plot[df_plot['model_clean'] == model]
        methods = model_data['method'].unique()

        if model in stats_data and measure in stats_data[model]:
            model_stats = stats_data[model][measure]
            if not model_stats.empty:
                means = model_stats['mean'].values
                stds = model_stats['std'].values

                x_pos = np.arange(len(methods))

                bars = ax.bar(x_pos,
                              means,
                              yerr=stds,
                              capsize=4,
                              width=0.7,
                              color=colors[:len(methods)],
                              edgecolor='black',
                              linewidth=0.8,
                              alpha=0.8)

                for m_idx, method in enumerate(methods):
                    method_data = model_data[model_data['method'] == method][measure]
                    jittered_x = np.random.normal(m_idx, 0.05, size=len(method_data))
                    ax.scatter(jittered_x, method_data, color='black', s=20, alpha=0.6, zorder=10)

                # -------------------------------------------------------------------
                # Bar Annotation
                # -------------------------------------------------------------------
                for bar in bars:
                    height = bar.get_height()
                    ax.annotate(f'{height:.2f}',
                                xy=(bar.get_x() + bar.get_width() / 2, height),
                                xytext=(0, 3), textcoords="offset points",
                                ha='center', va='bottom', fontsize=9)

        if i == len(measures) - 1:
            ax.set_xticks(np.arange(len(methods)))
            ax.set_xticklabels([method.replace('_', ' ').title() for method in methods],
                              rotation=45, ha='right', fontsize=9)
        else:
            ax.set_xticklabels([])
            ax.set_xticks(np.arange(len(methods)))

        ax.set_ylim(y_limits[measure])
        ax.grid(True, linestyle='--', alpha=0.3, axis='y')

        if i == 0:
            ax.set_title(model.replace('_', ' '), fontsize=14, fontweight='bold')

        if j == 0:
            ax.set_ylabel(measure_labels[i], fontweight='bold', fontsize=12)
        else:
            pass

legend_ax = fig.add_subplot(gs[3, :])
legend_ax.axis('off')

unique_methods = df_plot['method'].unique()
legend_elements = [Patch(facecolor=colors[i],
                         edgecolor='black',
                        label=method.replace('_', ' ').title())
                  for i, method in enumerate(unique_methods)]

legend_ax.legend(handles=legend_elements,
                 loc='center',
                 ncol=min(6, len(unique_methods)),
                 frameon=True,
                 fontsize=12,
                 title='Methods',
                 title_fontsize=14)

plt.tight_layout(rect=[0, 0.02, 1, 0.95])

output_path_pdf = os.path.join("../output/verbosity", "verbosity_analysis_grid.pdf")
output_path_png = os.path.join("../output/verbosity", "verbosity_analysis_grid.png")

os.makedirs(os.path.dirname(output_path_pdf), exist_ok=True)

plt.savefig(output_path_pdf, format='pdf', dpi=300, bbox_inches='tight')
plt.savefig(output_path_png, format='png', dpi=300, bbox_inches='tight')

plt.show()

### Ablation Study

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Patch, Circle
from matplotlib import rcParams
import os

# -------------------------------------------------------------------
# EMAIL DIVERSITY ANALYSIS (ABLATION STUDY)
# -------------------------------------------------------------------
df_ablation = pd.read_csv("../output/diversity/diversity_results_body_ablation.csv")
df_ablation = df_ablation[df_ablation.model.str.contains("bare")]

df_lexical = pd.read_csv("../output/diversity/diversity_results_body.csv")
df_lexical = pd.concat([df_lexical, df_ablation])
df_lexical["model_clean"] = df_lexical.model.apply(lambda x: x.split("_")[-1])
df_lexical["method"] = df_lexical.model.apply(lambda x: "_".join(x.split("_")[:-1]))
df_lexical["model_clean"] = df_lexical["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())
df_lexical = df_lexical[df_lexical.method.str.contains("bare")] 

# -------------------------------------------------------------------
# Academic Config
# -------------------------------------------------------------------
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']
rcParams['font.size'] = 11
rcParams['axes.linewidth'] = 0.8
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

# -------------------------------------------------------------------
# Setup variables
# -------------------------------------------------------------------
all_methods = sorted(df_lexical['method'].unique())
if 'bare' not in all_methods:
    print("Warning: 'bare' method not found in the data")
if 'iterative_bare' not in all_methods:
    print("Warning: 'iterative_bare' method not found in the data")

models = ["Claude", "Mistral"]
lexical_measures = ['lexical_diversity_distinct_1', 'lexical_diversity_distinct_2', 'lexical_diversity_distinct_3']
lexical_labels = ['Distinct-1', 'Distinct-2', 'Distinct-3']

colors = sns.color_palette("colorblind", n_colors=len(all_methods))
method_to_color = {method: colors[i] for i, method in enumerate(all_methods)}

highlight_methods = ['bare', 'iterative_bare']

# -------------------------------------------------------------------
# Calculate statistics
# -------------------------------------------------------------------
lexical_stats = {}
for model in models:
    lexical_stats[model] = {}
    for measure in lexical_measures:
        by_method = df_lexical[df_lexical['model_clean'] == model].groupby('method')[measure].agg(['mean', 'std'])
        lexical_stats[model][measure] = by_method

lexical_max = {}
for measure in lexical_measures:
    measure_max = 0
    for model in models:
        if measure in lexical_stats[model] and not lexical_stats[model][measure].empty:
            try:
                measure_max = max(measure_max, lexical_stats[model][measure]['mean'].max() * 1.2)
            except:
                pass
    lexical_max[measure] = measure_max

# -------------------------------------------------------------------
# Create the main plot
# -------------------------------------------------------------------
fig_lexical = plt.figure(figsize=(16, 12))
gs_lexical = GridSpec(4, len(models), figure=fig_lexical, height_ratios=[1, 1, 1, 0.15])

for i, measure in enumerate(lexical_measures):
    for j, model in enumerate(models):
        ax = fig_lexical.add_subplot(gs_lexical[i, j])
        
        model_data = df_lexical[df_lexical['model_clean'] == model]
        
        if model in lexical_stats and measure in lexical_stats[model]:
            model_stats = lexical_stats[model][measure]
            if not model_stats.empty:
                means = model_stats['mean'].values
                stds = model_stats['std'].values if 'std' in model_stats.columns else np.zeros_like(means)
                
                x_pos = np.arange(len(all_methods))
                
                bars = ax.bar(x_pos, 
                              means, 
                              yerr=stds, 
                              capsize=4, 
                              width=0.7,
                              color=[method_to_color[method] for method in all_methods], 
                              edgecolor='black',
                              linewidth=0.8, 
                              alpha=0.8)
                
                                
                for idx, bar in enumerate(bars):
                    height = bar.get_height()
                    method = all_methods[idx]
                    
                    if method in highlight_methods:
                        ax.annotate(f'{height:.3f}',
                                    xy=(bar.get_x() + bar.get_width() / 2, height),
                                    xytext=(0, 3), textcoords="offset points",
                                    ha='center', va='bottom', fontsize=10,
                                    fontweight='bold', color='red')
                    else:
                        ax.annotate(f'{height:.3f}',
                                    xy=(bar.get_x() + bar.get_width() / 2, height),
                                    xytext=(0, 3), textcoords="offset points",
                                    ha='center', va='bottom', fontsize=9)
        
        if i == len(lexical_measures) - 1:
            ax.set_xticks(np.arange(len(all_methods)))
            ax.set_xticklabels([method.replace('_', ' ').title() for method in all_methods], 
                              rotation=45, ha='right', fontsize=9)
        else:
            ax.set_xticklabels([])
            ax.set_xticks(np.arange(len(all_methods)))
        
        if measure in lexical_max:
            ax.set_ylim(0, lexical_max[measure])
        
        ax.grid(True, linestyle='--', alpha=0.3, axis='y')
        
        if i == 0:
            ax.set_title(model, fontsize=14, fontweight='bold')
        
        if j == 0:
            ax.set_ylabel(lexical_labels[i], fontweight='bold', fontsize=12)

legend_ax = fig_lexical.add_subplot(gs_lexical[3, :])
legend_ax.axis('off')

legend_elements = []
for i, method in enumerate(all_methods):
    if method in highlight_methods:
        bar_patch = Patch(facecolor=method_to_color[method], edgecolor='red',
                         label=f"{method.replace('_', ' ').title()}")
        
        legend_elements.append(bar_patch)
        
    else:
        patch = Patch(facecolor=method_to_color[method], edgecolor='black',
                     label=method.replace('_', ' ').title())
        legend_elements.append(patch)

legend_ax.legend(handles=legend_elements, 
                 loc='center', 
                 ncol=min(6, len(all_methods)),
                 frameon=True, 
                 fontsize=12, 
                 title='Methods', 
                 title_fontsize=14)

plt.tight_layout(rect=[0, 0.02, 1, 0.95])

plt.savefig("lexical_diversity_distinct_metrics_bare_and_iterative.png", format='png', dpi=300, bbox_inches='tight')
plt.savefig("lexical_diversity_distinct_metrics_bare_and_iterative.pdf", format='pdf', dpi=300, bbox_inches='tight')

print("Bare and iterative_bare plots created successfully.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import rcParams
import matplotlib.ticker as mtick
import os

# -------------------------------------------------------------------
# LOAD AND PREPARE DATA
# -------------------------------------------------------------------
try:
    data = pd.read_csv("../output/diversity/diversity_results_body.csv")
except FileNotFoundError:
    print("CSV not found. Creating dummy data for demonstration.")
    models_list = ["method1_gpt4", "method2_gpt4", "method3_gpt4",
                   "method1_llama7b", "method2_llama7b", "method3_llama7b",
                   "method1_llama70b", "method2_llama70b", "method3_llama70b"]
    records = []
    for model_full in models_list:
        for i in range(10):
            if i < 3:
                 pos, neu, neg = np.random.dirichlet([10, 5, 0.1])
            else:
                 pos, neu, neg = np.random.dirichlet([5, 5, 2])
            records.append({
                'model': model_full,
                'sentiment_neg': neg,
                'sentiment_neu': neu,
                'sentiment_pos': pos
            })
    data = pd.DataFrame(records)


data["model_clean"] = data.model.apply(lambda x: x.split("_")[-1])
data["method"] = data.model.apply(lambda x: "_".join(x.split("_")[:-1]))
data["model_clean"] = data["model_clean"].apply(lambda x: x.upper() if x.lower() == "gpt4" else x.title())

model_order = sorted(data['model_clean'].unique())
method_order = sorted(data['method'].unique())

# -------------------------------------------------------------------
# Data Aggregation
# -------------------------------------------------------------------
df_agg = data.groupby(['model_clean', 'method'])[['sentiment_neg', 'sentiment_neu', 'sentiment_pos']].mean().reset_index()
df_agg['model_clean'] = pd.Categorical(df_agg['model_clean'], categories=model_order, ordered=True)
df_agg['method'] = pd.Categorical(df_agg['method'], categories=method_order, ordered=True)
df_agg = df_agg.sort_values(['model_clean', 'method'])

# -------------------------------------------------------------------
# Plot Configuration
# -------------------------------------------------------------------
rcParams['font.size'] = 12

sentiment_colors = {
    'sentiment_pos': '#1a9850',
    'sentiment_neu': '#bababa',
    'sentiment_neg': '#d73027'
}
sentiment_labels = {
    'sentiment_neg': 'Negative',
    'sentiment_neu': 'Neutral',
    'sentiment_pos': 'Positive'
}
sentiments_to_plot = ['sentiment_pos', 'sentiment_neu', 'sentiment_neg']

# -------------------------------------------------------------------
# Visualization
# -------------------------------------------------------------------
n_models = len(model_order)
fig, axes = plt.subplots(
    nrows=1,
    ncols=n_models,
    figsize=(5.5 * n_models, 7),
    sharey=True
)
if n_models == 1:
    axes = [axes]

ANNOTATION_THRESHOLD = 0.04
ANNOTATION_FONT_SIZE = 9

for i, model_name in enumerate(model_order):
    ax = axes[i]
    model_data = df_agg[df_agg['model_clean'] == model_name].set_index('method')

    bottom = np.zeros(len(model_data))
    bar_width = 0.75

    for sentiment in sentiments_to_plot:
        values = model_data[sentiment]
        bars = ax.bar(
            model_data.index,
            values,
            label=sentiment_labels[sentiment],
            bottom=bottom,
            color=sentiment_colors[sentiment],
            width=bar_width,
            edgecolor='white',
            linewidth=0.5
        )

        text_color = 'black'

        for bar_idx, bar in enumerate(bars):
            value = bar.get_height()
            if value > ANNOTATION_THRESHOLD:
                ax.text(
                    bar.get_x() + bar.get_width() / 2,
                    bottom[bar_idx] + value / 2,
                    f'{value:.1%}',
                    ha='center',
                    va='center',
                    color=text_color,
                    fontsize=ANNOTATION_FONT_SIZE,
                    fontweight='bold'
                )

        bottom += values

    ax.set_title(model_name, fontsize=16, fontweight='bold', pad=15)

    clean_xticklabels = [label.replace('_', ' ').title().replace("Attr Prompting", "AttrPrompting").replace("Bare Llama8B", "BARE (Llama 8B)").replace("Bare Llama3B", "BARE (Llama 3B)") for label in model_data.index]
    ax.set_xticks(range(len(clean_xticklabels)))
    ax.set_xticklabels(clean_xticklabels, rotation=45, ha='right', fontsize=11, fontweight="bold")

    ax.set_xlabel('')
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0, decimals=0))
    ax.set_ylim(0, 1)
    ax.grid(axis='y', linestyle='--', alpha=0.6)
    sns.despine(ax=ax)


# -------------------------------------------------------------------
# Formatting
# -------------------------------------------------------------------
axes[0].set_ylabel('Sentiment Composition', fontsize=14, fontweight='bold')

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(
    handles,
    labels,
    loc='lower center',
    ncol=3,
    frameon=True,
    fontsize=12,
    title='Sentiment Category',
    title_fontsize=13
)

plt.tight_layout(rect=[0, 0.1, 1, 0.95])

output_dir = "output/sentiment_improved"
os.makedirs(output_dir, exist_ok=True)
output_path_pdf = os.path.join(output_dir, "sentiment_analysis_stacked_annotated.pdf")
output_path_png = os.path.join(output_dir, "sentiment_analysis_stacked_annotated.png")

plt.savefig(output_path_pdf, format='pdf', dpi=300, bbox_inches='tight')
plt.savefig(output_path_png, format='png', dpi=300, bbox_inches='tight')

plt.show()