# Runtime execution data
These scripts calculate and plot execution time for each tool.

In [None]:
from helpers.sqlite_helpers import sql_query_to_pd
import pandas as pd

In [None]:
def get_merge_tool_execution_time_per_scenario_query(tool_name: str) -> str:
  return f"""
    SELECT
      scenario_id, time_in_ns / 1e6 as time_in_ms
    FROM
      {tool_name}_executions_per_commit_filtered
  """

In [None]:
execution_time_per_scenario_query = f"""
    SELECT
        lm.time_in_ms as LAST_MERGE,
        j.time_in_ms AS JDIME,
        m.time_in_ms AS MERGIRAF,
        s.time_in_ms AS SPORK
    FROM
      ({get_merge_tool_execution_time_per_scenario_query("last_merge")}) lm
    JOIN
      ({get_merge_tool_execution_time_per_scenario_query("spork")}) s
    ON
      lm.scenario_id = s.scenario_id
    JOIN
      ({get_merge_tool_execution_time_per_scenario_query("mergiraf")}) m
    ON
      lm.scenario_id = m.scenario_id
    JOIN
      ({get_merge_tool_execution_time_per_scenario_query("jdime")}) j
    ON
      lm.scenario_id = j.scenario_id
  """

sql_query_to_pd(execution_time_per_scenario_query)

## Plotting execution time per scenario

In [None]:
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

execution_times = sql_query_to_pd(execution_time_per_scenario_query)
execution_times

tools = execution_times.columns

fig, ax = plt.subplots(figsize=(8, 4))

# Create a list of colors for the boxplots based on the number of features you have
boxplots_colors = ['yellowgreen', 'olivedrab', 'blue', 'red']

# Boxplot data
bp = ax.boxplot(execution_times, patch_artist = True, vert = False, showfliers = False)

# Change to the desired color and add transparency
for patch, color in zip(bp['boxes'], boxplots_colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.4)

# Create a list of colors for the violin plots based on the number of features you have
violin_colors = ['thistle', 'orchid', 'red', 'blue']

# Violinplot data
vp = ax.violinplot(execution_times, points=500,
               showmeans=False, showextrema=False, showmedians=False, vert=False)

for idx, b in enumerate(vp['bodies']):
    # Get the center of the plot
    m = np.mean(b.get_paths()[0].vertices[:, 0])
    # Modify it so we only see the upper half of the violin plot
    b.get_paths()[0].vertices[:, 1] = np.clip(b.get_paths()[0].vertices[:, 1], idx+1, idx+2)
    # Change to the desired color
    b.set_color(violin_colors[idx])

# Create a list of colors for the scatter plots based on the number of features you have
scatter_colors = ['tomato', 'darksalmon', 'red', 'tomato']

# Scatterplot data
for idx, features in enumerate(execution_times):
    features = execution_times[features]
    # Add jitter effect so the features do not overlap on the y-axis
    y = np.full(len(features), idx + .8)
    idxs = np.arange(len(y))
    out = y.astype(float)
    out.flat[idxs] += np.random.uniform(low=-.05, high=.05, size=len(idxs))
    y = out
    plt.scatter(features, y, s=.3, c=scatter_colors[idx])

plt.yticks(np.arange(1,len(tools) + 1,1), tools)  # Set text labels.
plt.xlabel('Time in ms')
plt.title("Execution time per merge scenario (Raincloud)")
ax.set_xscale('log')
plt.show()

In [None]:
execution_times = sql_query_to_pd(execution_time_per_scenario_query)
stats = pd.DataFrame(data = [
  {
      "tool": tool,
       "average": execution_times[tool].mean(),
       "std_avg": execution_times[tool].std(),
       "median": execution_times[tool].median(),
       "max": execution_times[tool].max(),
       "min": execution_times[tool].min()
  } for tool in execution_times.columns
])

stats