In [None]:
import pandas as pd

csv_path = (
    "../results/csv/all_results.csv"  # Update this path to your CSV file location
)

# Load the CSV file
df = pd.read_csv(csv_path)
# Convert temperatures to a uniform numeric type if necessary
df["temperature"] = df["temperature"].replace(
    {"0p0": 0.0, "0p3": 0.3, "0p6": 0.6, "0p9": 0.9, "1p2": 1.2}
)

# Check the first few rows of the DataFrame to understand its structure
print(df.head())

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Aggregate the scores by model and temperature
grouped_data = df.groupby(["model_name", "temperature"]).score.sum().reset_index()

# Plotting the total score for each model and temperature combination
plt.figure(figsize=(12, 8))
sns.lineplot(
    data=grouped_data, x="temperature", y="score", hue="model_name", marker="o"
)
plt.title("Total Score by Temperature for Each Model")
plt.xlabel("Temperature")
plt.ylabel("Total Score")
plt.legend(title="Model Name")
plt.grid(True)
plt.show()

In [None]:


# Step 2: Find the best temperature for each model
best_temps = grouped_data.loc[grouped_data.groupby("model_name")["score"].idxmax()]

# Step 3: Plotting the results
plt.figure(figsize=(10, 6))
sns.barplot(data=best_temps, x="model_name", y="score", hue="temperature", dodge=False)
plt.title("Total Score at Best Temperature for Each Model")
plt.xlabel("Model Name")
plt.ylabel("Total Score")
plt.legend(title="Best Temperature")
plt.xticks(rotation=45)  # Rotate model names for better readability if necessary
plt.show()

In [None]:

# Step 1: Calculate success rate
# Success counts
success_counts = (
    df[df["score"] == 1]
    .groupby(["model_name", "temperature"])
    .score.count()
    .reset_index(name="success_count")
)

# Total trials
total_counts = (
    df.groupby(["model_name", "temperature"])
    .score.count()
    .reset_index(name="total_count")
)

# Merge and calculate success rate
success_rate_data = pd.merge(
    success_counts, total_counts, on=["model_name", "temperature"]
)
success_rate_data["success_rate"] = (
    success_rate_data["success_count"] / success_rate_data["total_count"]
)

# Step 2: Line Graph of Success Rate for each model
plt.figure(figsize=(12, 8))
sns.lineplot(
    data=success_rate_data,
    x="temperature",
    y="success_rate",
    hue="model_name",
    marker="o",
)
plt.title("Success Rate by Temperature for Each Model")
plt.xlabel("Temperature")
plt.ylabel("Success Rate")
plt.legend(title="Model Name")
plt.grid(True)
plt.show()

In [None]:
# Step 3: Bar Chart of Best Temperature for Each Model
# Finding best temperatures based on highest success rate
best_temps = success_rate_data.loc[
    success_rate_data.groupby("model_name")["success_rate"].idxmax()
]

# Plotting
plt.figure(figsize=(10, 6))
sns.barplot(
    data=best_temps, x="model_name", y="success_rate", hue="temperature", dodge=False
)
plt.title("Best Temperature Success Rate for Each Model")
plt.xlabel("Model Name")
plt.ylabel("Success Rate")
plt.legend(title="Best Temperature")
plt.xticks(rotation=45)
plt.show()

In [None]:

# Assuming success_rate_data contains the model_name, temperature, and success_rate
# Create a pivot table for the heatmap
pivot_table = success_rate_data.pivot(
    index="model_name", columns="temperature", values="success_rate"
)

# Plotting the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(pivot_table, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5)
plt.title("Heatmap of Success Rates by Model and Temperature")
plt.xlabel("Temperature")
plt.ylabel("Model Name")
plt.show()

In [None]:
# Example code for conflict type analysis
conflict_scores = df.groupby(["model_name", "conflict_type"]).score.mean().reset_index()
plt.figure(figsize=(12, 8))
sns.barplot(x="conflict_type", y="score", hue="model_name", data=conflict_scores)
plt.title("Performance by Conflict Type for Each Model")
plt.xlabel("Conflict Type")
plt.ylabel("Average Score")
plt.legend(title="Model Name")
plt.show()

In [None]:
# Check if 'score' and 'temperature' exist in the original DataFrame
print(df.columns)

# Calculate average score for each model and temperature
average_scores = df.groupby(["model_name", "temperature"]).score.mean().reset_index()

# Ensure that average_scores DataFrame is correct
print(average_scores.head())

# Find the temperature with the highest average score for each model
best_temperatures = average_scores.loc[
    average_scores.groupby("model_name")["score"].idxmax()
]

# Ensure that best temperatures DataFrame is correct and includes necessary columns
print(best_temperatures.head())

# Merge the best temperatures back to the original DataFrame to filter only those entries
filtered_df = pd.merge(
    df, best_temperatures, on=["model_name", "temperature"], how="inner"
)

# Check if 'score' column is present in filtered_df
print(filtered_df.columns)

# Step 3: Performance by Conflict Type for Each Model at Their Best Temperature
conflict_scores = (
    filtered_df.groupby(["model_name", "conflict_type"]).score_x.mean().reset_index()
)

# Step 4: Visualize the Results
plt.figure(figsize=(12, 8))
sns.barplot(x="conflict_type", y="score_x", hue="model_name", data=conflict_scores)
plt.title("Performance by Conflict Type for Each Model at Best Temperature")
plt.xlabel("Conflict Type")
plt.ylabel("Average Score")
plt.legend(title="Model Name")
plt.xticks(rotation=45)  # Rotate for better readability if needed
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Boxplot to compare runtime across models
plt.figure(figsize=(12, 8))
sns.boxplot(x="model_name", y="runtime", data=df)
plt.title("Runtime Distribution by Model")
plt.xlabel("Model Name")
plt.ylabel("Runtime (seconds)")
plt.xticks(rotation=45)
plt.show()

In [None]:
# Calculate success counts (where score = 1)
success_counts = (
    df[df["score"] == 1]
    .groupby(["model_name", "temperature"])
    .size()
    .reset_index(name="success_count")
)

# Calculate total trials for each model and temperature
total_counts = (
    df.groupby(["model_name", "temperature"]).size().reset_index(name="total_count")
)

# Merge success and total counts
success_rate_data = pd.merge(
    success_counts, total_counts, on=["model_name", "temperature"]
)

# Calculate success rate
success_rate_data["success_rate"] = (
    success_rate_data["success_count"] / success_rate_data["total_count"]
)

In [None]:
# Boxplot to compare total commands across models
plt.figure(figsize=(12, 8))
sns.boxplot(x="model_name", y="num_total_commands", data=df)
plt.title("Distribution of Total Commands by Model")
plt.xlabel("Model Name")
plt.ylabel("Total Commands")
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.show()

In [None]:
# Boxplot to compare sent commands across models
plt.figure(figsize=(12, 8))
sns.boxplot(x="model_name", y="num_send_commands", data=df)
plt.title("Distribution of Sent Commands by Model")
plt.xlabel("Model Name")
plt.ylabel("Sent Commands")
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.show()

In [None]:

# Ensure your DataFrame 'df' has the 'temperature' column formatted correctly
# If temperature is stored in a format like '0p0', '0p3', you need to replace it as shown in previous examples.

# Boxplot to compare sent commands across models and temperatures
plt.figure(figsize=(14, 8))
sns.boxplot(x="model_name", y="num_send_commands", hue="temperature", data=df)
plt.title("Distribution of Sent Commands by Model and Temperature")
plt.xlabel("Model Name")
plt.ylabel("Sent Commands")
plt.legend(title="Temperature")
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.show()

In [None]:
# Grouping data to find the average score
grouped_data = (
    df.groupby(["model_name", "temperature", "num_aircraft"]).score.mean().reset_index()
)

# Set up a FacetGrid to plot a grid of line plots
g = sns.FacetGrid(
    grouped_data,
    col="model_name",
    hue="temperature",
    col_wrap=2,
    height=4,
    aspect=1.5,
    palette="viridis",
)
g.map(sns.lineplot, "num_aircraft", "score")

# Adjustments for better readability and aesthetics
g.add_legend(title="Temperature")
g.set_titles("{col_name}")
g.set_axis_labels("Number of Aircraft", "Average Score")
g.fig.subplots_adjust(top=0.9)  # adjust the Figure in `g`
g.fig.suptitle(
    "Number of Aircraft vs. Average Score by Model and Temperature", fontsize=16
)

plt.show()