In [None]:
import pandas as pd
import matplotlib.pyplot as plt

datasets = [
    'pad_ufes_20', 'cbis_ddsm_mass', 'cbis_ddsm_calc', 'petfinder_image',
    'airbnb', 'cloth', 'salary', 'petfinder_text', 'petfinder_image_text'
]

ref_x_values = {
    'pad_ufes_20': 21,
    'cbis_ddsm_mass': 8,
    'cbis_ddsm_calc': 8,
    'petfinder_image': 19,
    'airbnb': 50,
    'cloth': 5,
    'salary': 4,
    'petfinder_text': 19,
    'petfinder_image_text': 19
}

for dataset in datasets:
    df = pd.read_csv(f"{dataset}.csv", sep='\t')

    alpha_vals = (df['mgm_heads'] - df['mgm_heads'].min()) / (df['mgm_heads'].max() - df['mgm_heads'].min())
    alpha_vals = 0.2 + 0.8 * alpha_vals

    plt.figure()

    plt.errorbar(df['cap_heads'], df['mean'], yerr=df['std'],
                 fmt='none', capsize=4, color='black', alpha=0.5)

    for x, y, a in zip(df['cap_heads'], df['mean'], alpha_vals):
        plt.scatter(x, y, color='black', alpha=a)

    avg_df = df.groupby('cap_heads')['mean'].mean()
    plt.scatter(avg_df.index, avg_df.values, color='red', s=60, zorder=3)

    # Vertical reference line
    plt.axvline(x=ref_x_values[dataset], color='blue', linestyle='--', linewidth=1)

    plt.xlabel("X")
    plt.ylabel("Y")
    plt.title(dataset)

    plt.savefig(f"{dataset}.png", dpi=300, bbox_inches='tight')
    plt.close()
