# MNIST Evaluation Results Visualization

This notebook visualizes the accuracy and performance metrics from the MNIST evaluation results.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Set the style for the plots
plt.style.use('ggplot')
sns.set_palette("colorblind")
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['font.size'] = 12

## Load the MNIST Evaluation Data

In [None]:
# Load the data from the CSV file
data_path = '../results/mnist_eval.csv'
df = pd.read_csv(data_path)

# Display the data
print(f"Loaded {len(df)} configurations from {data_path}")
df

## Accuracy Visualization

In [None]:
# Create a bar chart for accuracy
plt.figure(figsize=(14, 8))
ax = sns.barplot(x='config', y='accuracy', data=df, palette='viridis')

# Add data labels on top of each bar
for i, v in enumerate(df['accuracy']):
    ax.text(i, v + 0.5, f"{v:.2f}%", ha='center', fontweight='bold')

# Customize the plot
plt.title('MNIST Accuracy by Configuration', fontsize=18, pad=20)
plt.xlabel('Configuration', fontsize=14)
plt.ylabel('Accuracy (%)', fontsize=14)
plt.ylim(0, 105)  # Set y-axis limit to accommodate the labels
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add a horizontal line for reference at 95% accuracy
plt.axhline(y=95, color='r', linestyle='--', alpha=0.5, label='95% Threshold')
plt.legend()

plt.show()

## Combined Visualization: Accuracy and Configuration Details

In [None]:
# Create a figure with subplots
fig, ax = plt.subplots(figsize=(16, 10))

# Create a colormap based on weight and activation types
color_map = {
    ('Float8', 'Float8'): 'tab:blue',
    ('BF16', 'BF16'): 'tab:orange',
    ('Float8', 'BF16'): 'tab:green'
}

# Get colors based on weight and activation types
colors = [color_map.get((w, a), 'tab:gray') for w, a in zip(df['weight_type'], df['activation_type'])]

# Create the bar chart
bars = ax.bar(df['config'], df['accuracy'], color=colors)

# Add data labels
for bar, acc in zip(bars, df['accuracy']):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
            f'{acc:.2f}%', ha='center', va='bottom', fontweight='bold')

# Customize the plot
ax.set_title('MNIST Accuracy by Hardware Configuration', fontsize=20, pad=20)
ax.set_xlabel('Configuration', fontsize=16)
ax.set_ylabel('Accuracy (%)', fontsize=16)
ax.set_ylim(0, 105)
plt.xticks(rotation=45, ha='right')

# Add a grid
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Create a custom legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=color, label=f'{w}-{a}') 
                   for (w, a), color in color_map.items()]
ax.legend(handles=legend_elements, title='Weight-Activation Types', 
          loc='lower right', fontsize=12)

plt.tight_layout()
plt.show()

## Performance Metrics Visualization

In [None]:
# Create a figure with multiple subplots
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Total execution time
sns.barplot(x='config', y='total_time', data=df, ax=axes[0], palette='Blues_d')
axes[0].set_title('Total Execution Time by Configuration', fontsize=16)
axes[0].set_xlabel('')
axes[0].set_ylabel('Time (ms)', fontsize=14)
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=45, ha='right')
axes[0].grid(axis='y', linestyle='--', alpha=0.7)

# Add data labels
for i, v in enumerate(df['total_time']):
    if not pd.isna(v):
        axes[0].text(i, v + 100, f"{v:.1f}", ha='center')

# Plot 2: Samples per second (throughput)
sns.barplot(x='config', y='samples_per_second', data=df, ax=axes[1], palette='Greens_d')
axes[1].set_title('Throughput (Samples per Second) by Configuration', fontsize=16)
axes[1].set_xlabel('Configuration', fontsize=14)
axes[1].set_ylabel('Samples/Second', fontsize=14)
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45, ha='right')
axes[1].grid(axis='y', linestyle='--', alpha=0.7)

# Add data labels
for i, v in enumerate(df['samples_per_second']):
    if not pd.isna(v):
        axes[1].text(i, v + 0.1, f"{v:.2f}", ha='center')

plt.tight_layout()
plt.show()

## Accuracy vs. Performance Trade-off

In [None]:
# Create a scatter plot to visualize the trade-off between accuracy and performance
plt.figure(figsize=(12, 8))

# Create a scatter plot with custom colors based on configuration
scatter = plt.scatter(df['total_time'], df['accuracy'], 
                      c=[plt.cm.viridis(i/len(df)) for i in range(len(df))],
                      s=100, alpha=0.7)

# Add labels for each point
for i, config in enumerate(df['config']):
    plt.annotate(config, 
                 (df['total_time'].iloc[i], df['accuracy'].iloc[i]),
                 xytext=(10, 5), textcoords='offset points',
                 fontsize=10, fontweight='bold')

# Customize the plot
plt.title('Accuracy vs. Execution Time Trade-off', fontsize=18)
plt.xlabel('Total Execution Time (ms)', fontsize=14)
plt.ylabel('Accuracy (%)', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)

# Add a colorbar legend
plt.colorbar(plt.cm.ScalarMappable(cmap='viridis'), 
             label='Configuration Index')

plt.tight_layout()
plt.show()

## Summary and Conclusions

In [None]:
# Create a summary table
summary_df = df[['config', 'weight_type', 'activation_type', 'multiplier', 'accuracy', 'total_time', 'samples_per_second']].copy()

# Sort by accuracy (descending)
summary_df = summary_df.sort_values('accuracy', ascending=False)

# Display the summary
summary_df.style.background_gradient(subset=['accuracy'], cmap='Greens')\
    .background_gradient(subset=['total_time'], cmap='Reds_r')\
    .background_gradient(subset=['samples_per_second'], cmap='Blues')

## Key Findings

Based on the visualizations above, we can draw the following conclusions:

1. **Accuracy Performance**: The BF16 configurations (wb16ab16-8x8 and w8ab16-8x8) achieve significantly higher accuracy (>97%) compared to the Float8 configuration (w8a8-8x8) which only achieves around 9.59% accuracy.

2. **Execution Time**: There are notable differences in execution time across configurations, with potential trade-offs between accuracy and speed.

3. **Throughput**: The samples per second metric shows how efficiently each configuration processes the data, with some configurations showing better throughput despite longer total execution times.

4. **Weight-Activation Type Impact**: The combination of weight and activation types significantly affects both accuracy and performance, with BF16 types generally providing better accuracy.

5. **Multiplier Impact**: The choice of multiplier implementation (float_multiplier vs. lmul_fast) affects the performance characteristics while maintaining similar accuracy levels.

These insights can guide hardware accelerator design decisions based on specific requirements for accuracy vs. performance trade-offs.