In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go




In [16]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Load the data from CSV files
df_naive = pd.read_csv('timing_naive.csv')
df_optimal = pd.read_csv('timing_optimal.csv')
df_seq = pd.read_csv("timing_seq.csv")


# Calculate means for both datasets
naive_means = df_naive.groupby("Size")["Time(ms)"].mean()
optimal_means = df_optimal.groupby("Size")["Time(ms)"].mean()
seq_means = df_seq.groupby("Size")["Time(ms)"].mean()

# Create a figure to plot the data
fig = go.Figure()

# fig.add_trace(go.Scatter(
#     x=naive_means.index,
#     y=naive_means.values,
#     mode='lines+markers',
#     marker=dict(color='red', size=8),
#     line=dict(color='red'),
#     name='"Naive" parallel'
# ))
fig.add_trace(go.Scatter(
    x=seq_means.index,
    y=seq_means.values,
    mode='lines+markers',
    marker=dict(color='blue', size=8),
    line=dict(color='blue'),
    name='Sequential'
))

fig.add_trace(go.Scatter(
    x=optimal_means.index,
    y=optimal_means.values,
    mode='lines+markers',
    marker=dict(color='red', size=8),
    line=dict(color='red'),
    name='Parallel'
))



# Update layout to use log scale on both axes and configure the plot
fig.update_layout(
    xaxis_title='Array length',
    yaxis_title='Time (ms)',
    xaxis=dict(type='log'),
    yaxis=dict(type='log'),
    legend=dict(y=0.5, x=0.95, xanchor='right', bgcolor='rgba(205, 223, 212, 0.4)')
)

fig.show()


In [17]:
def prepare_data(series):
    # Select the last 5 entries
    last_five = series.tail(5)
    
    # Log-transform both the index (Size) and the values (Time)
    log_size = np.log(last_five.index)
    log_time = np.log(last_five.values)
    
    return log_size, log_time

log_size_seq, log_time_seq = prepare_data(seq_means)
log_size_optimal, log_time_optimal = prepare_data(optimal_means)


In [18]:
import statsmodels.api as sm

def run_regression(log_size, log_time):
    # Add a constant to the model (the intercept term)
    log_size = sm.add_constant(log_size)
    
    # Create a model and fit it
    model = sm.OLS(log_time, log_size)
    results = model.fit()
    
    return results

results_naive = run_regression(log_size_naive, log_time_naive)
results_optimal = run_regression(log_size_optimal, log_time_optimal)
results_seq = run_regression(log_size_seq, log_time_seq)


In [19]:
def print_results(results, label):
    print(f"Results for {label}:")
    print(f"Intercept (a): {results.params[0]:.4f}")
    print(f"Slope (b): {results.params[1]:.4f}")
    print(f"R-squared: {results.rsquared:.4f}\n")

print_results(results_seq, "Sequential")
print_results(results_optimal, "Optimal")

Results for Sequential:
Intercept (a): -10.2462
Slope (b): 0.9712
R-squared: 0.9988

Results for Optimal:
Intercept (a): -13.0879
Slope (b): 1.0490
R-squared: 1.0000

