# Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import os
import statsmodels.api as sm

# Broadcast

In [None]:
# import broadcast results from results folder
bcast_epyc = pd.read_csv('../results/bcastEPYC.csv')
bcast_thin = pd.read_csv('../results/bcastTHIN.csv')

In [None]:
# Function to plot latency vs processors for broadcast
def plot_latency_vs_processors_bcast_size(data, errorbar=False, save=False, filename=None):
    # Define mapping of algorithm numbers to names
    algorithms = {0: 'default', 1: 'basic_linear', 2: 'chain', 5: 'binary_tree'}

    # Create a 2x2 subplot grid with a specified size and sharey set to False
    fig, axs = plt.subplots(2, 2, figsize=(10, 7), sharey=False)

    # Loop over each algorithm
    for i, algorithm in enumerate([0, 1, 2, 5]):

        # Loop over each size (2^i for i in [1, 5, 9, 13, 17])
        for j, size in enumerate([2**i for i in range(1, 21, 4)]):
            # Group the DataFrame by Size(bytes), Algorithm, and Processors, calculate mean and standard deviation of Avg_Latency(us)
            df_grouped = data.groupby(['Size(bytes)', 'Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

            # Filter the grouped DataFrame for the current algorithm and size
            df_size = df_grouped[(df_grouped['Algorithm'] == algorithm) & 
                                (df_grouped['Size(bytes)'] == size)].drop(columns=['Algorithm', 'Size(bytes)']).reset_index(drop=True)
            
            # Plot scatter plot for mean, use log2(size) as label and assign a unique marker style
            marker_styles = ['o', 's', '^', 'D']
            marker_index = j % len(marker_styles)  # Ensure the index stays within the marker list length
            if not errorbar:
                axs[i // 2, i % 2].scatter(df_size['Processors'], df_size['mean'], label=int(math.log2(size)), marker=marker_styles[marker_index])
                axs[i // 2, i % 2].plot(df_size['Processors'], df_size['mean'], linestyle='--', linewidth=1, color='gray', alpha=0.7)

            # Error bars for standard deviation
            if errorbar:
                axs[i // 2, i % 2].errorbar(df_size['Processors'], df_size['mean'], yerr=df_size['std'],
                                            capsize=5, label=int(math.log2(size)), marker=marker_styles[marker_index],
                                            linestyle='--', linewidth=1)

        # Add legends only for the first plot
        if i == 0:
            axs[i // 2, i % 2].legend(title='log(Size (bytes))')

        # Set y-axis label and subplot title
        axs[i // 2, i % 2].set_ylabel('Latency (us)')
        axs[i // 2, i % 2].set_title(f'{algorithms[algorithm]}')

        # Add x-axis label for the second row
        axs[1, i % 2].set_xlabel('Number of Processes')

    # Remove x-axis labels and ticks for the first row
    for ax in axs[0]:
        ax.set_xticklabels([])
        ax.tick_params(axis='x', which='both', bottom=False, top=False)

    # Adjust layout to prevent clipping
    plt.tight_layout()

    # Title for the entire plot
    plt.suptitle('Broadcast Operation')

    # Save the plot if save is True
    if save:
        plt.savefig('../plots/'+filename+'.png')

    # Show the plot
    plt.show()

# Function to plot latency vs processors for broadcast
def plot_latency_vs_processors_bcast_algorithm(data, errorbar=False, save=False, filename=None):
    # Define mapping of algorithm numbers to names
    algorithms = {0: 'default', 1: 'basic_linear', 2: 'chain', 5: 'binary_tree'}
    markers = ['o', 's', '^', 'D']  # Marker styles for each algorithm

    # Create a 1x3 subplot grid with a specified size and sharey set to False
    fig, axs = plt.subplots(1, 3, figsize=(12, 4), sharey=False)

    # Loop over each message size
    for i, size in enumerate([2**i for i in [1, 6, 15]]):
        # Group the DataFrame by Size(bytes), Algorithm, and Processors, calculate mean and standard deviation of Avg_Latency(us)
        df_grouped = data.groupby(['Size(bytes)', 'Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

        # Loop over each algorithm
        for j, algorithm in enumerate([0, 1, 2, 5]):
            # Filter the grouped DataFrame for the current algorithm and size
            df_size = df_grouped[(df_grouped['Algorithm'] == algorithm) &
                                (df_grouped['Size(bytes)'] == size)].drop(columns=['Algorithm', 'Size(bytes)']).reset_index(drop=True).iloc[:]

            # Plot scatter plot for mean, use algorithm as label and assign a unique marker style
            if not errorbar:
                axs[i].scatter(df_size['Processors'], df_size['mean'], label=f'{algorithms[algorithm]}', marker=markers[j])
                axs[i].plot(df_size['Processors'], df_size['mean'], linestyle='--', linewidth=1, alpha=0.7)
            # Error bars for standard deviation
            if errorbar:
                axs[i].errorbar(df_size['Processors'], df_size['mean'], yerr=df_size['std'], capsize=5, label=f'{algorithms[algorithm]}', 
                    marker=markers[j], linestyle='--', linewidth=1)
    
        # Add legends only for the first plot
        if i == 0:
            axs[i].legend(title='Algorithm')
            # Set y-axis label and subplot title
            axs[i].set_ylabel('Latency (us)')

        axs[i].set_title(r'$2^{' + f'{int(math.log2(size))}' + '}$ bytes')

        # Add x-axis label for the second row
        axs[i].set_xlabel('Number of Processes')

    # Adjust layout to prevent clipping
    plt.tight_layout()

    # Title for the entire plot
    # plt.suptitle('Broadcast Operation')

    # Save the plot if save is True
    if save:
        plt.savefig('../plots/'+filename+'.png')

    # Show the plot
    plt.show()

In [None]:
plot_latency_vs_processors_bcast_algorithm(bcast_epyc, errorbar=True)

In [None]:
plot_latency_vs_processors_bcast_size(bcast_epyc, errorbar=True, save=True, filename='bcast_epyc_size')

In [None]:
plot_latency_vs_processors_bcast_size(bcast_thin, save=True, filename='bcast_thin_size')

In [None]:
plot_latency_vs_processors_bcast_algorithm(bcast_thin, errorbar=True, save=True, filename='bcast_thin_algorithm')

In [None]:
def plot_average_latency_vs_message_size_dual(data_thin, data_epyc, errorbar=False, input_token=None, save=False, filename=None):
    # Define mapping of algorithm numbers to names
    algorithms = {0: 'default', 1: 'basic_linear', 2: 'chain', 5: 'binary_tree'}

    # Create a 1x2 subplot grid
    fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=False)

    # Loop over each dataset
    for i, (data, node) in enumerate(zip([data_thin, data_epyc], ['THIN', 'EPYC'])):
        # Loop over each algorithm
        for algorithm in [0, 1, 2, 5]:
            # Group the DataFrame by Size(bytes), Algorithm, and calculate mean and standard deviation of Avg_Latency(us)
            df_grouped = data.groupby(['Size(bytes)', 'Algorithm'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

            if node == 'EPYC':
                # Filter the grouped DataFrame for the current algorithm
                df_algorithm = df_grouped[df_grouped['Algorithm'] == algorithm].drop(columns=['Algorithm']).reset_index(drop=True).iloc[:input_token]
            else:
                df_algorithm = df_grouped[df_grouped['Algorithm'] == algorithm].drop(columns=['Algorithm']).reset_index(drop=True).iloc[:input_token]

            # Plot scatter plot for mean, use log2(size) as label and assign a unique marker style
            marker_styles = ['o', 's', '^', 'D']
            marker_index = algorithm % len(marker_styles)  # Ensure the index stays within the marker list length
            if not errorbar:
                axs[i].scatter(df_algorithm['Size(bytes)'], df_algorithm['mean'], label=algorithms[algorithm], marker=marker_styles[marker_index])
                axs[i].plot(df_algorithm['Size(bytes)'], df_algorithm['mean'], linestyle='--', linewidth=1, alpha=0.7)

            # Error bars for standard deviation
            if errorbar:
                axs[i].errorbar(df_algorithm['Size(bytes)'], df_algorithm['mean'], yerr=df_algorithm['std'], capsize=5, label=algorithms[algorithm], 
                    marker=marker_styles[marker_index], linestyle='--', linewidth=1)
                axs[i].plot(df_algorithm['Size(bytes)'], df_algorithm['mean'], linestyle='--', linewidth=1, alpha=0.7)

        # Set labels and title for each subplot
        axs[i].set_xlabel('Size (bytes)')
        if i==0:
          axs[i].set_title(f'THIN')
        else:
          axs[i].set_title(f'EPYC')
        
        # log2 scale for x-axis
        axs[i].set_xscale('log', base=2)

    # Set y-axis label for the left subplot
    axs[0].set_ylabel('Average Latency (us)')

    # Add legend to the first subplot
    axs[0].legend(title='Algorithm')

    # Save the plot if save is True
    if save:
        plt.savefig('../plots/'+filename+'.png')

    # Show the plot
    plt.show()

# Example usage
plot_average_latency_vs_message_size_dual(bcast_thin, bcast_epyc, errorbar=False, input_token=-15)


In [None]:
# Overall mean of the latency for each algorithm
print(bcast_thin.groupby(['Algorithm'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index())
print(bcast_epyc.groupby(['Algorithm'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index())

# THIN: 0 < 1 < 5 < 2 : defualt < basic_linear < binary_tree < chain
# EPYC: 0 < 2 < 5 < 1 : defualt < chain < binary_tree < basic_linear

In [None]:
# Define mapping of algorithm numbers to names
algorithms = {0: 'default', 1: 'basic_linear', 2: 'chain', 5: 'binary_tree'}

token = -5

# Set up the 1x2 subplot grid
fig, axs = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

# Loop over each algorithm
for i, algorithm in enumerate(algorithms.keys()):
    # Define a list of marker styles for each algorithm
    markers = ['o', 's', '^', 'D']

    # Filter data for the current algorithm in bcast_thin
    algorithm_data_thin = bcast_thin[bcast_thin['Algorithm'] == algorithm]

    # Group by Algorithm and Processors, calculate mean latency
    grouped_data_thin = algorithm_data_thin.groupby(['Algorithm', 'Processors'])['Avg_Latency(us)'].mean().reset_index()

    # Extract data for plotting
    processors_thin = grouped_data_thin['Processors']
    average_latency_thin = grouped_data_thin['Avg_Latency(us)']

    # Plot the results with dashed lines and different markers in the first subplot
    axs[0].plot(processors_thin, average_latency_thin, label=algorithms[algorithm], linestyle='--', marker=markers[i])

    # Filter data for the current algorithm in bcast_epyc
    algorithm_data_epyc = bcast_epyc[bcast_epyc['Algorithm'] == algorithm]

    # Group by Algorithm and Processors, calculate mean latency
    grouped_data_epyc = algorithm_data_epyc.groupby(['Algorithm', 'Processors'])['Avg_Latency(us)'].mean().reset_index()

    # Extract data for plotting
    processors_epyc = grouped_data_epyc['Processors'].iloc[:token]
    average_latency_epyc = grouped_data_epyc['Avg_Latency(us)'].iloc[:token]

    # Plot the results with dashed lines and different markers in the second subplot
    axs[1].plot(processors_epyc, average_latency_epyc, label=algorithms[algorithm], linestyle='--', marker=markers[i])

# Set labels and title for the first subplot
axs[0].set_xlabel('Number of Processes')
axs[0].set_ylabel('Average Latency (us)')
axs[0].set_title('THIN')

# Set labels and title for the second subplot
axs[1].set_xlabel('Number of Processes')
axs[1].set_title('EPYC')

# Show legend in the first subplot
axs[0].legend()

# Adjust layout to prevent clipping
plt.tight_layout()

plt.savefig('../plots/bcast_epyc_thin.png')

# Show the plot
plt.show()


# Barrier

In [89]:
# import broadcast results from results folder
barrier_epyc = pd.read_csv('../results/barrierEPYC.csv')
barrier_thin = pd.read_csv('../results/barrierTHIN.csv')

In [90]:
barrier_epyc['Processors'].unique()

array([  2,   4,   8,  16,  32,  48,  64,  96, 128, 176, 224, 256])

In [None]:
def plot_latency_vs_processors_barrier(data_thin, data_epyc, save=False, filename=None):
    # Define mapping of algorithm numbers to names
    algorithms = {0: 'default', 1: 'linear', 2: 'double_ring', 4: 'bruck'}
    markers = ['o', 's', '^', 'D']  # Marker styles for each algorithm

    # Create a 1x2 subplot grid with a specified size
    fig, axs = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

    # Loop over each dataframe (thin and epyc)
    for i, data in enumerate([data_thin, data_epyc]):
        # Group the DataFrame by Algorithm and Processors, calculate mean and standard deviation of Avg_Latency(us)
        df_grouped = data.groupby(['Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

        # Loop over each algorithm
        for j, algorithm in enumerate([0, 1, 2, 4]):
            # Filter the grouped DataFrame for the current algorithm
            if i == 0:
                df_algorithm = df_grouped[df_grouped['Algorithm'] == algorithm].drop(columns=['Algorithm']).reset_index(drop=True)
            else:
                df_algorithm = df_grouped[df_grouped['Algorithm'] == algorithm].drop(columns=['Algorithm']).reset_index(drop=True).iloc[:]


            # Plot error bars for mean and standard deviation
            axs[i].errorbar(df_algorithm['Processors'], df_algorithm['mean'] / 1e3, yerr=df_algorithm['std'] / 1e3,
                            capsize=5, label=f'{algorithms[algorithm]}',
                            marker=markers[j], linestyle='--', linewidth=1)

        # Set labels and title for the first subplot
        if i == 0:
            axs[i].set_xlabel('Number of Processes')
            axs[i].set_ylabel('Avg Latency (ms)')
            axs[i].set_title('THIN')
            axs[i].legend(title='Algorithm')
        else:
            axs[i].set_title('EPYC')
            axs[i].set_xlabel('Number of Processes')
            axs[i].tick_params(axis='y', which='both')  # Remove y-axis ticks

    # Adjust layout to prevent clipping
    plt.tight_layout()

    # Save the plot if save is True
    if save:
        plt.savefig('../plots/' + filename + '.png')

    # Show the plot
    plt.show()

# Assuming barrier_thin and barrier_epyc are your DataFrames containing the data
plot_latency_vs_processors_barrier(barrier_thin, barrier_epyc, save=True, filename='barrier_comparison')


In [None]:
# average latency for each algorithm in each dataset
avg_barrier_thin = barrier_thin.groupby(['Algorithm'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()
avg_barrier_epyc = barrier_epyc.groupby(['Algorithm'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()
algorithm_names = ['default', 'linear', 'double_ring', 'bruck']

# Lets print the average latency for each algorithm in each dataset
print(avg_barrier_thin)
print(avg_barrier_epyc)

# Create a single plot
fig, ax = plt.subplots(figsize=(5, 4))

# Bar plots for both datasets side by side
width = 0.35
bar_positions_thin = range(len(algorithm_names))
bar_positions_epyc = [pos + width for pos in bar_positions_thin]

ax.bar(bar_positions_thin, avg_barrier_thin['mean']/1e3, width=width, yerr=avg_barrier_thin['std']/1e3, capsize=5, color='blue', alpha=0.7, label='THIN')
ax.bar(bar_positions_epyc, avg_barrier_epyc['mean']/1e3, width=width, yerr=avg_barrier_epyc['std']/1e3, capsize=5, color='orange', alpha=0.7, label='EPYC')

# Set labels and title
ax.set_xlabel('Algorithm')
ax.set_ylabel('Average Latency (ms)')
# ax.set_title('Barrier Operation')
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Set x-axis ticks and labels
ax.set_xticks([pos + width / 2 for pos in bar_positions_thin])
ax.set_xticklabels(algorithm_names)

# Add legend
ax.legend()

# Rotate x-axis labels for better readability
# plt.xticks(rotation=45, ha='right')

# Adjust layout to prevent clipping
plt.tight_layout()

# Save the plot
plt.savefig('../plots/barrier_comparison_average.png')

# Show the plot
plt.show()


# Possible Modelling

## Broadcast

In [None]:
# define transformation function to the number of processes
def transformation(x):
    # return np.log2(x)
    return x

data = bcast_thin
algorithm = 1
token = None

df_grouped = data.groupby(['Size(bytes)', 'Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

r_squared_values = []

# Loop over powers from 1 to 20
for power in range(1, 21):

    # Filter the grouped DataFrame for the current algorithm and size
    df_size = df_grouped[(df_grouped['Algorithm'] == algorithm) & 
                         (df_grouped['Size(bytes)'] == 2**power)].drop(columns=['Algorithm', 'Size(bytes)']).reset_index(drop=True)

    # Extract X and y values
    X = df_size['Processors'].values.reshape(-1, 1)
    X_new = transformation(X)[:token]
    y = df_size['mean'].values[:token]

    # Perform linear regression without intercept
    regressor = LinearRegression(fit_intercept=True)
    regressor.fit(X_new, y)

    # Obtain slope (coefficients) and R-squared value
    slope = regressor.coef_[0]
    intercept = regressor.intercept_
    r_squared = r2_score(y, regressor.predict(X_new))
    r_squared_values.append(r_squared)
    print(f'R-squared: {r_squared}')

    # Plot scatter plot for mean
    plt.scatter(transformation(X), df_size['mean'], label=f'Power {power}')
    # Plot the linear regression line
    plt.plot(X_new, regressor.predict(X_new), linestyle='--', linewidth=1, alpha=0.7)

    plt.ylabel('Latency (us)')
    plt.title('Broadcast Operation - Linear Regression Fit')
    plt.legend()

    # Show the plot or save it as an image
    plt.show()

# # Plot R-squared values
# plt.plot(range(1, 21), r_squared_values, marker='o')
# plt.xlabel('Power')
# plt.ylabel('R-squared')
# plt.title('R-squared values for Linear Regression Fit')
# plt.grid(True)
# plt.show()

# # mean R-squared values
print(np.mean(r_squared_values))

In [None]:
data = bcast_epyc

algorithms = [1, 2]

df_grouped = data.groupby(['Size(bytes)', 'Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

algorithms = {1: 'basic_linear', 2: 'chain'}

# Set up the 1x2 subplot grid
fig, axs = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

for i, algorithm in enumerate(algorithms.keys()):

  if algorithm == 1:
    tokens = [-2, None]#, -3]
    powers = [7, 13]#, 19]
  else:
    tokens = [-3, -2]#, None]
    powers = [8, 14]#, 20]

  for j, (power, token) in enumerate(zip(powers, tokens)):
    # Filter the grouped DataFrame for the current algorithm and size
    df_size = df_grouped[(df_grouped['Algorithm'] == algorithm) & 
                        (df_grouped['Size(bytes)'] == 2**power)].drop(columns=['Algorithm', 'Size(bytes)']).reset_index(drop=True)

    # Define a list of marker styles for each algorithm
    markers = ['o', 's', '^']

    # Extract X and y values
    X = df_size['Processors'].values.reshape(-1, 1)
    y = df_size['mean'].values
    X_new = X[:token]
    y_new = y[:token]

    # Perform linear regression without intercept
    regressor = LinearRegression(fit_intercept=True)
    regressor.fit(X_new, y_new)
    y_pred = regressor.predict(X_new)

    r_squared = r2_score(y_new, regressor.predict(X_new))

    axs[i].scatter(X, y, label=r'$2^{' + f'{power}' + '}$ bytes' + f'; $R^2 = $ {r_squared:.2f}', marker=markers[j])
    axs[i].plot(X_new, y_pred, linestyle='-', linewidth=1.5, alpha=0.7)
    axs[i].set_xlabel('Number of Processes')
    axs[0].set_ylabel('Latency (us)')
    axs[i].set_title(algorithms[algorithm])
    axs[i].legend()


plt.tight_layout()
plt.savefig('../plots/bcast_epyc_linear_chain.png')
plt.show()


In [None]:
data = bcast_epyc

df_grouped = data.groupby(['Size(bytes)', 'Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()

algorithm = 5

# Set up the 1x2 subplot grid
fig, axs = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

for i, x_scale in enumerate(['identity', 'log2']):
  if x_scale == 'identity':
    tokens = [-6, None]#, None]
    powers = [6, 13]#, 18]
  else:
    tokens = [None, None]#, -2]
    powers = [7, 13]#, 19]

  for j, (power, token) in enumerate(zip(powers, tokens)):
    # Filter the grouped DataFrame for the current algorithm and size
    df_size = df_grouped[(df_grouped['Algorithm'] == algorithm) & 
                        (df_grouped['Size(bytes)'] == 2**power)].drop(columns=['Algorithm', 'Size(bytes)']).reset_index(drop=True)

    # Define a list of marker styles for each algorithm
    markers = ['o', 's', '^']

    # Extract X and y values
    X = df_size['Processors'].values.reshape(-1, 1)
    y = df_size['mean'].values
    if x_scale == 'identity':
      X_new = X[:token]
    elif x_scale == 'log2':
      X_new = np.log2(X)[:token]
      X = np.log2(X)
    
    y_new = y[:token]

    # Perform linear regression without intercept
    regressor = LinearRegression(fit_intercept=True)
    regressor.fit(X_new, y_new)
    y_pred = regressor.predict(X_new)

    r_squared = r2_score(y_new, regressor.predict(X_new))

    axs[i].scatter(X, y, label=r'$2^{' + f'{power}' + '}$ bytes' + f'; $R^2 = $ {r_squared:.2f}', marker=markers[j])
    axs[i].plot(X_new, y_pred, linestyle='-', linewidth=1.5, alpha=0.7)
    
    if x_scale == 'identity':
      axs[i].set_xlabel('Number of Processes')
    else:
      axs[i].set_xlabel(f'$log_2$ Number of Processes')
    
    axs[0].set_ylabel('Latency (us)')
    axs[i].set_title(x_scale)
    axs[i].legend()


plt.tight_layout()
plt.savefig('../plots/bcast_epyc_binary_tree.png')
plt.show()


## Barrier

In [None]:
nodes = ['THIN', 'EPYC']
datasets = [barrier_thin, barrier_epyc]
algorithm = 2

tokens = [None, -3]

# Set up the 1x2 subplot grid
fig, axs = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

for i, (node, data) in enumerate(zip(nodes, datasets)):
  df_grouped = data.groupby(['Algorithm', 'Processors'])['Avg_Latency(us)'].agg(['mean', 'std']).reset_index()
  
  data_alg = df_grouped[df_grouped['Algorithm'] == algorithm].drop(columns=['Algorithm']).reset_index(drop=True)

  X = data_alg['Processors'].values.reshape(-1, 1)
  X_new = X[:tokens[i]]
  y = data_alg['mean'].values[:tokens[i]]/1e3

  # Perform linear regression 
  regressor = LinearRegression(fit_intercept=True)
  regressor.fit(X_new, y)
  y_pred = regressor.predict(X_new)

    # Get slope and intercept from the linear regression
  slope = regressor.coef_[0]
  intercept = regressor.intercept_

  # Add a constant term to the predictor for statsmodels
  X_new_const = sm.add_constant(X_new)

  # Perform linear regression with statsmodels
  model = sm.OLS(y, X_new_const)
  results = model.fit()

  # Get summary with standard errors
  covariance_matrix = results.cov_params()

  print(f'latency p2p ({node}) = {0.5*slope*1e3} +- {0.5*np.sqrt(covariance_matrix[1,1])*1e3} us')

  # Create the equation string
  equation_str = f'$y = {slope:.2f}x {intercept:.2f}$'

  r_squared = r2_score(y, regressor.predict(X_new))

  # Plot scatter plot for mean
  axs[i].scatter(X, data_alg['mean'].values/1e3, marker = 'x', color='blue')
  # Plot the linear regression line
  axs[i].plot(X_new, y_pred, linestyle='-', linewidth=1.5, alpha=0.7, color='red', label=f'$R^2 = $ {r_squared:.2f}')

  axs[i].set_xlabel('Number of Processes')
  axs[0].set_ylabel('Latency (ms)')
  axs[i].set_title(node)
  axs[i].legend(loc = 'upper left')

  # Display the linear regression equation below the plot label
  axs[i].annotate(equation_str, xy=(0.14, 0.8), xycoords='axes fraction', ha='center', va='center',
                  bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='white'),
                  fontsize=10, color='green')

plt.tight_layout()
plt.savefig('../plots/barrier_linear_regression.png')
plt.show()