In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

## X-axis: # of shots

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.express as px

# Data creation
data = {
    'reset': ['active', 'active', 'active', 'thermal', 'thermal', 'thermal', 
              'active', 'active', 'active', 'active', 'active', 'active',
              'active', 'active', 'active', 'active', 'active', 'active',
              'thermal', 'active', 'active', 'active', 'active', 'active',
              'active', 'active', 'active', 'active', 'active', 'active',
              'active', 'active', 'active', 'active', 'active', 'active',
              'active', 'thermal', 'thermal'],
    'max_attempts': ['1', '1', '1', 'none', 'none', 'none', 
                     '5', '5', '5', '15', '15', '15',
                     '30', '30', '30', '1', '15', '30',
                     'none', '30', '15', '5', '1', '1',
                     '1', '1', '1', '1', '1', '5',
                     '5', '5', '5', '15', '15', '15',
                     '15', 'none', 'none'],
    'amp_steps': [53, 53, 53, 53, 53, 53, 
                  53, 53, 53, 53, 53, 53,
                  53, 53, 53, 53, 53, 53,
                  53, 53, 53, 53, 53, 5300,
                  530, 5300, 5300, 530, 530, 530,
                  530, 530, 530, 5300, 5300, 5300,
                  5300, 530, 5300],
    'shots': [100, 500, 1000, 100, 500, 1000,
              100, 500, 1000, 100, 500, 1000,
              100, 500, 1000, 1, 1, 1,
              3000, 3000, 3000, 3000, 3000, 3000,
              30, 500, 1000, 1000, 500, 100,
              500, 1000, 3000, 100, 500, 1000,
              3000, 100, 100],
    'elapse_time': [8.35, 8.69, 8.53, 16.65, 25.93, 41.7,
                    8.58, 16.69, 17.965, 8.365, 16.71, 25.295,
                    8.37, 25.08, 33.44, 8.325, 8.365, 8.355,
                    94.63, 76.25, 41.67, 24.99, 16.72, 328.38,
                    8.35, 67.06, 114.82, 24.95, 16.66, 16.64,
                    33.38, 49.92, 116.69, 47.9, 192.06, 386.65,
                    1153.2, 41.71, 320.12]
}

df = pd.DataFrame(data)

# Create legend combinations
df['legend'] = df['reset'] + '_' + df['max_attempts'].astype(str) + '_' + df['amp_steps'].astype(str)

# Get unique legend values
unique_legends = df['legend'].unique()

# Create color palette
colors = px.colors.qualitative.Plotly + px.colors.qualitative.Set1 + px.colors.qualitative.Set2

# Create Plotly figure
fig = go.Figure()

# List to store regression analysis results
regression_results = []

# Add scatter plot and regression line for each legend group
for i, legend in enumerate(unique_legends):
    subset = df[df['legend'] == legend]
    color = colors[i % len(colors)]
    
    # Add scatter plot
    fig.add_trace(go.Scatter(
        x=subset['shots'],
        y=subset['elapse_time'],
        mode='markers',
        name=legend,
        marker=dict(
            size=8,
            color=color
        ),
        legendgroup=legend,  # Group with same legend
        showlegend=True
    ))
    
    # Add regression line only when there are 2 or more data points
    if len(subset) >= 2:
        X = subset['shots'].values.reshape(-1, 1)
        y = subset['elapse_time'].values
        
        # Train linear regression model
        model = LinearRegression()
        model.fit(X, y)
        
        # Generate x values for regression line
        x_min, x_max = subset['shots'].min(), subset['shots'].max()
        
        # Handle case when x_min equals x_max
        if x_min == x_max:
            x_range = np.array([x_min - 10, x_min, x_min + 10])
        else:
            x_range = np.linspace(x_min, x_max, 50)
        
        y_pred = model.predict(x_range.reshape(-1, 1))
        
        # Add regression line
        fig.add_trace(go.Scatter(
            x=x_range,
            y=y_pred,
            mode='lines',
            name=f'{legend}_regression',
            line=dict(color=color, width=2, dash='dash'),
            legendgroup=legend,  # Group with same legend
            showlegend=False  # Hide regression line from legend
        ))
        
        # Save regression analysis results
        regression_results.append({
            'legend': legend,
            'a (slope)': model.coef_[0],
            'b (intercept)': model.intercept_,
            'n_points': len(subset),
            'equation': f'y = {model.coef_[0]:.4f}x + {model.intercept_:.2f}'
        })

# Update layout
fig.update_layout(
    title='Shots vs Elapse Time with Individual Linear Regressions',
    xaxis_title='# of shots',
    yaxis_title='avg elapse_time (s)',
    hovermode='closest',
    width=1200,
    height=700,
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02
    )
)

# Display graph
fig.show()

# Print regression analysis results
print("\n" + "="*70)
print("Linear Regression Analysis Results for Each Legend (y = ax + b)")
print("="*70)

# Convert results to DataFrame for better display
results_df = pd.DataFrame(regression_results)
results_df = results_df.sort_values('legend')

for idx, row in results_df.iterrows():
    print(f"\nLegend: {row['legend']}")
    print(f"  Equation: {row['equation']}")
    print(f"  Slope (a): {row['a (slope)']:.6f}")
    print(f"  Intercept (b): {row['b (intercept)']:.4f}")
    print(f"  Number of data points: {row['n_points']}")

# Print summary table
print("\n" + "="*70)
print("Regression Analysis Summary Table")
print("="*70)

# Print formatted table
summary_df = results_df[['legend', 'a (slope)', 'b (intercept)', 'n_points']].copy()
summary_df['a (slope)'] = summary_df['a (slope)'].round(6)
summary_df['b (intercept)'] = summary_df['b (intercept)'].round(4)

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

print(summary_df.to_string(index=False))

# CSV save option
save_to_csv = input("\n\nWould you like to save the results to a CSV file? (y/n): ")
if save_to_csv.lower() == 'y':
    results_df.to_csv('regression_results.csv', index=False)
    print("Results have been saved to regression_results.csv")


Linear Regression Analysis Results for Each Legend (y = ax + b)

Legend: active_15_53
  Equation: y = 0.0111x + 9.83
  Slope (a): 0.011137
  Intercept (b): 9.8329
  Number of data points: 5

Legend: active_15_5300
  Equation: y = 0.3824x + 5.25
  Slope (a): 0.382352
  Intercept (b): 5.2479
  Number of data points: 4

Legend: active_1_53
  Equation: y = 0.0029x + 7.48
  Slope (a): 0.002871
  Intercept (b): 7.4809
  Number of data points: 5

Legend: active_1_530
  Equation: y = 0.0171x + 7.93
  Slope (a): 0.017108
  Intercept (b): 7.9284
  Number of data points: 3

Legend: active_1_5300
  Equation: y = 0.1052x + 12.33
  Slope (a): 0.105171
  Intercept (b): 12.3295
  Number of data points: 3

Legend: active_30_53
  Equation: y = 0.0226x + 9.51
  Slope (a): 0.022596
  Intercept (b): 9.5058
  Number of data points: 5

Legend: active_5_53
  Equation: y = 0.0048x + 11.58
  Slope (a): 0.004764
  Intercept (b): 11.5771
  Number of data points: 4

Legend: active_5_530
  Equation: y = 0.0340x + 