Data Analysis Code

In [None]:
# Extracting relevant data from the datasets

# For Orca-2 Model
orca_avg_gamma_speed = orca_data['gamma'].mean()
orca_avg_alpha_speed = orca_data['alpha'].mean()
orca_avg_approx_speed = orca_data['approx_tokens_p_sec'].mean()
orca_avg_target_speed = orca_data['target_tokens_p_second'].mean()
orca_avg_sp_speed = orca_data['sp_tokens_p_second'].mean()

# For Bloom Model
bloom_avg_gamma_speed = bloom_data['gamma'].mean()
bloom_avg_alpha_speed = bloom_data['alpha'].mean()
bloom_avg_approx_speed = bloom_data['approx_tokens_p_sec'].mean()
bloom_avg_target_speed = bloom_data['target_tokens_p_second'].mean()
bloom_avg_sp_speed = bloom_data['sp_tokens_p_second'].mean()

orca_avg_gamma_speed, orca_avg_alpha_speed, orca_avg_approx_speed, orca_avg_target_speed, orca_avg_sp_speed, \
bloom_avg_gamma_speed, bloom_avg_alpha_speed, bloom_avg_approx_speed, bloom_avg_target_speed, bloom_avg_sp_speed



In [None]:
# Data for plotting without Gamma
methods_non_gamma = ['Approximation', 'Target', 'Speculative Decoding']
orca_speeds_non_gamma = [orca_avg_approx_speed, orca_avg_target_speed, orca_avg_sp_speed]
bloom_speeds_non_gamma = [bloom_avg_approx_speed, bloom_avg_target_speed, bloom_avg_sp_speed]

# Create plots without Gamma
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 6))

# Orca-2 Model Plot without Gamma
axes[0].bar(methods_non_gamma, orca_speeds_non_gamma, color='blue')
axes[0].set_title('Orca-2 Model Speeds without Gamma')
axes[0].set_ylabel('Tokens per second')
axes[0].set_ylim(0, max(orca_speeds_non_gamma) + 0.1)  # Adding some space above the highest bar

# Bloom Model Plot without Gamma
axes[1].bar(methods_non_gamma, bloom_speeds_non_gamma, color='green')
axes[1].set_title('Bloom Model Speeds without Gamma')
axes[1].set_ylabel('Tokens per second')
axes[1].set_ylim(0, max(bloom_speeds_non_gamma) + 0.1)  # Adding some space above the highest bar

plt.tight_layout()
plt.show()



In [None]:
import seaborn as sns

# Setting up the data for seaborn
orca_df = orca_data[['gamma', 'approx_tokens_p_sec', 'target_tokens_p_second', 'sp_tokens_p_second']].melt(id_vars='gamma', var_name='Method', value_name='Tokens/Sec')
bloom_df = bloom_data[['gamma', 'approx_tokens_p_sec', 'target_tokens_p_second', 'sp_tokens_p_second']].melt(id_vars='gamma', var_name='Method', value_name='Tokens/Sec')

# Set up the matplotlib figure
fig, axes = plt.subplots(1, 2, figsize=(20, 8), sharey=True)

# Orca Plot
sns.lineplot(x='gamma', y='Tokens/Sec', hue='Method', data=orca_df, ax=axes[0])
axes[0].set_title('Orca-2 Model Speeds Comparison')
axes[0].set_xlabel('Gamma')
axes[0].set_ylabel('Tokens per Second')

# Bloom Plot
sns.lineplot(x='gamma', y='Tokens/Sec', hue='Method', data=bloom_df, ax=axes[1])
axes[1].set_title('Bloom Model Speeds Comparison')
axes[1].set_xlabel('Gamma')
axes[1].set_ylabel('Tokens per Second')

# Improve layout
plt.tight_layout()

# Display plot
plt.show()
    

In [None]:
# Let's do some basic statistical analysis to compare the mean speeds of the target, approximation, and speculative decoding
# between the two datasets (1 GPU vs 2 GPUs with model parallelism).

# Rename columns in 2 GPU dataset to match 1 GPU dataset for easier comparison
bloom_2gpu_data.rename(columns={
    'target_tok_p_sec': 'target_tokens_p_second',
    'approx_token_p_sec': 'approx_tokens_p_sec',
    'sp_tok_p_sec': 'sp_tokens_p_second'
}, inplace=True)

# Compute mean values for each method in both datasets
mean_values_1gpu = bloom_1gpu_data[['approx_tokens_p_sec', 'target_tokens_p_second', 'sp_tokens_p_second']].mean()
mean_values_2gpu = bloom_2gpu_data[['approx_tokens_p_sec', 'target_tokens_p_second', 'sp_tokens_p_second']].mean()

# Create a DataFrame to hold the mean values for easier comparison
mean_comparison_df = pd.DataFrame({'1 GPU': mean_values_1gpu, '2 GPUs': mean_values_2gpu})

mean_comparison_df


In [None]:
# Update label for 2 GPUs to include Model + Data Parallelism
methods_sp_decoding[1] = 'Model + Data Parallelism (2 GPUs)'

# Recreate bar plot with updated label
plt.figure(figsize=(10, 6))
plt.bar(methods_sp_decoding, sp_decoding_speeds, color=['blue', 'green'])
plt.title('Speculative Decoding Speeds: 1 GPU vs Model + Data Parallelism (2 GPUs)')
plt.ylabel('Tokens per second')
plt.xlabel('GPU Configuration')
plt.ylim(0, max(sp_decoding_speeds) + 0.01)  # Adding some space above the highest bar
plt.show()
