In [29]:
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [30]:
# Get parent directory (project root)
project_root = os.path.dirname(os.getcwd())
results_dir = os.path.join(project_root, "results/baseline_experiments_20241127_150533")

def load_environment_results(base_path):
    """Load results from all episodes in an environment directory."""
    all_results = []
    episode_dirs = sorted(glob.glob(os.path.join(base_path, "episode_*")))
    
    for episode_dir in episode_dirs:
        results_file = os.path.join(episode_dir, "results.csv")
        if os.path.exists(results_file):
            df = pd.read_csv(results_file)
            episode_num = int(os.path.basename(episode_dir).split('_')[1])
            df['Episode_Number'] = episode_num
            all_results.append(df)
    
    if all_results:
        return pd.concat(all_results, ignore_index=True)
    return pd.DataFrame()

In [31]:

# Load results for both environments
standard_env_results = load_environment_results(os.path.join(results_dir, "MiniGrid-Empty-8x8-v0"))
random_env_results = load_environment_results(os.path.join(results_dir, "random_goal_empty"))

def create_summary_stats(df, env_name):
    # Convert string 'True'/'False' to boolean if needed
    success_values = df['Success'].map({'True': True, 'False': False}) if df['Success'].dtype == object else df['Success']
    
    stats = {
        'Environment': env_name,
        'Success Rate (\\%)': (success_values.mean() * 100),
        'Average Steps': df['Steps'].mean(),
        'Std Steps': df['Steps'].std(),
        'Min Steps': df['Steps'].min(),
        'Max Steps': df['Steps'].max(),
        'Trials': len(df)
    }
    return pd.Series(stats)

# Create summary DataFrame
summary_df = pd.DataFrame([
    create_summary_stats(standard_env_results, 'Standard Empty'),
    create_summary_stats(random_env_results, 'Random Goal Empty')
])

In [35]:
# Create visualization
plt.figure(figsize=(12, 6))

# Steps Distribution Plot
plt.subplot(1, 2, 1)
plt.boxplot([
    standard_env_results['Steps'],
    random_env_results['Steps']
], labels=['Standard\nEmpty', 'Random\nGoal'])
plt.title('Steps Distribution')
plt.ylabel('Number of Steps')
plt.grid(True, alpha=0.3)

# Add individual points with jitter
for i, data in enumerate([standard_env_results['Steps'], random_env_results['Steps']], 1):
    x_jitter = np.random.normal(i, 0.04, size=len(data))
    plt.scatter(x_jitter, data, alpha=0.4, color=['blue', 'orange'][i-1])

# Success rate bar plot
plt.subplot(1, 2, 2)

# Calculate success rates (handling string 'True'/'False' values)
success_rates = [
    (standard_env_results['Success'] == True).mean() * 100,  # Using boolean comparison
    (random_env_results['Success'] == True).mean() * 100
]

plt.bar(['Standard\nEmpty', 'Random\nGoal'], success_rates)
plt.title('Success Rate')
plt.ylabel('Success Rate (%)')
plt.ylim(0, 100)

# Add value labels on top of bars
for i, rate in enumerate(success_rates):
    plt.text(i, rate + 1, f'{rate:.1f}%', ha='center')

plt.tight_layout()
plt.savefig('baseline_results_summary.png', dpi=300, bbox_inches='tight')
plt.close()

In [36]:
# Print summary statistics
print("\nSummary Statistics:")
print(summary_df.round(2))



Summary Statistics:
         Environment  Success Rate (\%)  Average Steps  Std Steps  Min Steps  \
0     Standard Empty               80.0           42.0       6.07         34   
1  Random Goal Empty               70.0           33.6      13.93         14   

   Max Steps  Trials  
0         50      10  
1         50      10  


In [33]:
#  Function to format numbers in the DataFrame
def format_float(x):
    if isinstance(x, float):
        return f"{x:.2f}"
    return x

# Create a copy of the DataFrame for formatting
formatted_df = summary_df.copy()
# Apply formatting to all numeric columns except 'Trials' which should be integer
numeric_cols = formatted_df.select_dtypes(include=['float64']).columns
for col in numeric_cols:
    if col != 'Trials':  # Skip the Trials column as it should remain integer
        formatted_df[col] = formatted_df[col].apply(format_float)

# Generate LaTeX table with formatted numbers
latex_table = formatted_df.to_latex(
    index=False,
    escape=False,
    column_format='l|r|r|r|r|r|r'
)

print("\nLaTeX Table:")
print(latex_table)


LaTeX Table:
\begin{tabular}{l|r|r|r|r|r|r}
\toprule
Environment & Success Rate (\%) & Average Steps & Std Steps & Min Steps & Max Steps & Trials \\
\midrule
Standard Empty & 80.00 & 42.00 & 6.07 & 34 & 50 & 10 \\
Random Goal Empty & 70.00 & 33.60 & 13.93 & 14 & 50 & 10 \\
\bottomrule
\end{tabular}



In [34]:
# First, let's inspect the data
print("Standard Environment Success values:")
print(standard_env_results['Success'].unique())
print("Type:", standard_env_results['Success'].dtype)

print("\nRandom Environment Success values:")
print(random_env_results['Success'].unique())
print("Type:", random_env_results['Success'].dtype)

# Now let's modify our analysis
successful_standard = standard_env_results[standard_env_results['Success'] == True]  # Try boolean True
successful_random = random_env_results[random_env_results['Success'] == True]  # Try boolean True

print("\nDetailed Success Analysis:")
print(f"Standard Empty Environment:")
print(f"Success Rate: {len(successful_standard)/len(standard_env_results)*100:.1f}%")
print(f"Average Steps (successful episodes): {successful_standard['Steps'].mean():.1f}")
print(f"Std Dev Steps (successful episodes): {successful_standard['Steps'].std():.1f}")

print(f"\nRandom Goal Empty Environment:")
print(f"Success Rate: {len(successful_random)/len(random_env_results)*100:.1f}%")
print(f"Average Steps (successful episodes): {successful_random['Steps'].mean():.1f}")
print(f"Std Dev Steps (successful episodes): {successful_random['Steps'].std():.1f}")

# If that doesn't work, let's try printing the first few rows of each DataFrame
print("\nFirst few rows of Standard Environment:")
print(standard_env_results[['Success', 'Steps']].head())
print("\nFirst few rows of Random Environment:")
print(random_env_results[['Success', 'Steps']].head())

Standard Environment Success values:
[ True False]
Type: bool

Random Environment Success values:
[ True False]
Type: bool

Detailed Success Analysis:
Standard Empty Environment:
Success Rate: 80.0%
Average Steps (successful episodes): 40.0
Std Dev Steps (successful episodes): 5.0

Random Goal Empty Environment:
Success Rate: 70.0%
Average Steps (successful episodes): 26.6
Std Dev Steps (successful episodes): 9.9

First few rows of Standard Environment:
   Success  Steps
0     True     38
1     True     49
2     True     45
3     True     37
4    False     50

First few rows of Random Environment:
   Success  Steps
0     True     21
1     True     33
2     True     40
3     True     36
4     True     25
