In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

plt.rcParams.update({'font.size': 15})

x_values = [
    [16.27, 13.29, 20.8, 12.25, 10.82],
    [6.85, 6.47, 18.48, 6.35, 5.39],
    [1/14.5, 1/7.89, 1/5.5, 1/14.93, 1/5.18],
    [1/7.34, 1/4.6, 1/3.1, 1/8.2, 1/3.03],
]

y_values = [
    [88.25, 42.11, 9.19, 69.25, 15],
    [90.04, 40.2, 5.94, 74.43, 7.7],
    [28.4, 32.2, 9.26, 22.92, 8.63],
    [40.14, 34.25, 6.86, 30.01, 5.64],
]

# Create subplots
fig, axs = plt.subplots(2, 2, figsize=(10, 8))

model = ['BERT', 'RoBERTa', 'Falcon', 'Platypus']
datasets = ['SQuAD', 'COVID-QA', 'TechQA', 'DuoRC', 'CUAD']

# Iterate over each dataset
for i, ax in enumerate(zip(axs.flatten(), model)):
    x = x_values[i]
    y = y_values[i]
    
    # Calculate linear regression
    slope, intercept, _, _, _ = linregress(x, y)
    trendline = intercept + slope * np.array(x)
    
    # Plot data points
    ax[0].scatter(x, y, label='Dataset')
    
    # Plot trendline
    ax[0].plot(x, trendline, color='red', label='Trendline')
    
    # Add labels for each point
    for j, (x_val, y_val, lbl) in enumerate(zip(x, y, datasets)):
        ax[0].text(x_val, y_val, lbl, fontsize=8.5, ha='center')
    
    # Add labels and title
    ax[0].set_xlabel('Model Perplexity')
    ax[0].set_ylabel('Zero-Shot Performance (F1)')
    ax[0].set_title(f'Perplexity v/s Performance for {ax[1]}')
    
    # Show legend
    ax[0].legend()

# Adjust layout
plt.tight_layout()

plt.savefig('PPLvsPerformance.pdf', format="pdf", transparent=True, dpi=300)

# Show plot
plt.show()

In [None]:
from scipy.stats import pearsonr

x_values = [
    [16.27, 13.29, 20.8, 12.25, 10.82],
    [6.85, 6.47, 18.48, 6.35, 5.39],
    [1/14.5, 1/7.89, 1/5.5, 1/14.93, 1/5.18],
    [1/7.34, 1/4.6, 1/3.1, 1/8.2, 1/3.03],
]

y_values = [
    [88.25, 42.11, 9.19, 69.25, 15],
    [90.04, 40.2, 5.94, 74.43, 7.7],
    [28.4, 32.2, 9.26, 22.92, 8.63],
    [40.14, 34.25, 6.86, 30.01, 5.64],
]

for x, y in zip(x_values, y_values):
    correlation, p_value = pearsonr(x, y)
    print("Pearson correlation coefficient:", correlation)
    print("P-value:", p_value)
