# Analyse the Results of Running Moran Process Experiment on Different Graphs
This is the newest version of this analysis file, where I can merge the csv of different jobs. 

imports

In [None]:
import os
os.environ

In [None]:
%load_ext autoreload
%autoreload 2
%cd /home/labs/pilpel/matanyaw/moran-process 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import numpy as np
import seaborn as sns
import os
from pathlib import Path
from analysis.analysis_utils import plot_hybrid_density, aggregate_results_no_load, GRAPH_PROPERTY_DESCRIPTION, COLOR_DICT
# change this if on a different computer!
from population_graph import GRAPH_PROPS
# Set aesthetic parameters for "publication-quality" plots
sns.set_theme(style="whitegrid", context="notebook", font_scale=1.2)
plt.rcParams['figure.figsize'] = (12, 7)
plt.rcParams['lines.linewidth'] = 2.5

batch_name = 'batch_' + 'Test_batch_name_01'

In [None]:


ROOT = Path(os.getcwd()) 

# Now define your paths relative to ROOT
data_dir = ROOT / "simulation_data"
batch_dir = data_dir / batch_name




In [None]:
import glob

output_file = os.path.join(batch_dir, f"temp_full_results.csv")
tmp_results_path = os.path.join(batch_dir, "tmp", "results")
all_files = glob.glob(os.path.join(tmp_results_path, "result_job_*.csv"))
print(f"Found {len(all_files)} files in temp results directory: {tmp_results_path}.")


In [None]:
results_df_path = aggregate_results_no_load(batch_dir=batch_dir, delete_temp=False)

In [None]:
results_df = pd.read_csv(results_df_path)
print("columns: ", results_df.columns)
print("shape: ", results_df.shape)

In [None]:
# Create a column where steps are NaN if fixation failed
# This allows .agg() to ignore those values automatically for median/std
results_df['steps_success'] = results_df['steps'].where(results_df['fixation'] == True)

analysis_df = results_df.groupby(['wl_hash', 'r', 'graph_name']).agg(
    prob_fixation=('fixation', 'mean'),
    median_steps=('steps_success', 'median'),
    mean_steps=('steps_success', 'mean'),
    std_steps=('steps_success', 'std'),
    q25_steps=('steps_success', lambda x: x.quantile(0.25)),
    q75_steps=('steps_success', lambda x: x.quantile(0.75)),
    iqr_steps=('steps_success', lambda x: x.quantile(0.75) - x.quantile(0.25)),
    n_grouped=('fixation', 'size')
).reset_index()

print("Shape before merging: ", analysis_df.shape)
# df_graphs = load_experiment_data('graph_database.csv')       # Graph database
df_graphs = pd.read_csv(os.path.join(batch_dir, 'graph_props.csv'))

# Merge with graph metadata
analysis_df = pd.merge(
    analysis_df, 
    df_graphs, 
    on=['wl_hash', 'graph_name'], 
    how='left', 
    suffixes=('', '_db')
)
# Sorting
analysis_df['z_order'] = (analysis_df['category'] != 'Random').astype(int)
analysis_df = analysis_df.sort_values('z_order').drop(columns='z_order')
analysis_df.to_csv(os.path.join(batch_dir, 'graph_statistics.csv'), index=False)


print("Shape after merging: ", analysis_df.shape)
# Display sample
analysis_df.tail(20)

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(analysis_df['mean_steps'], bins=50, edgecolor='black', alpha=0.7)
plt.xlabel('Mean Steps')
plt.ylabel('Frequency')
plt.title('Distribution of Mean Steps')
plt.grid(axis='y', alpha=0.3)
plt.show()

In [None]:
# Merge results_df and analysis_df on wl_hash and graph_name, excluding Random category
merged_df = pd.merge(
    results_df,
    df_graphs[df_graphs['category'] != 'Random'],
    on=['wl_hash', 'graph_name'],
    how='inner'
)

# Filter out Random category graphs
print(f"Merged dataframe shape: {merged_df.shape}")
merged_df.columns


In [None]:
print(GRAPH_PROPERTY_DESCRIPTION['avg_degree_centrality'])

In [None]:
# Overlaid histograms of steps_success by category
categories = ["Mammalian", "Fish", "Avian", "Accelerator", "Decelerator"]

plt.figure(figsize=(10, 6))

# Define common bins based on all data
animal_results = merged_df.loc[merged_df['category'].isin(categories), 'steps_success'].dropna()
# bins = 50  # or use: bins = np.linspace(all_data.min(), all_data.max(), 51)
bins = np.linspace(animal_results.min(), animal_results.max(), 51)

for category in categories:
    data = merged_df.loc[merged_df['category'] == category, 'steps_success'].dropna()
    plt.hist(data, bins=bins, alpha=0.4, edgecolor='black', label=category, color=COLOR_DICT[category])

plt.xlabel('Steps')
plt.ylabel('Frequency')
plt.title('Distribution of Steps to Fixation by Category')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
analysis_df['category']

In [None]:
plot_hybrid_density(analysis_df, 'std_steps', 'mean_steps', with_violin=False)


In [None]:
plot_hybrid_density(analysis_df, 'max_betweenness_centrality', 'mean_steps', with_violin=False)

In [None]:
plot_hybrid_density(analysis_df, 'max_degree_centrality', 'mean_steps', with_violin=True)


In [None]:
plot_hybrid_density(analysis_df, 'max_degree', 'prob_fixation', with_violin=True)


In [None]:
df_to_plot = analysis_df[analysis_df['r'] == 1.1]

# NEW_GRAPH_PROPS = ['avg_degree', 'max_degree']
print(GRAPH_PROPS)
# plot_hybrid_density(df_to_plot, 'mean_steps', 'std_steps', with_violin=False)
plot_hybrid_density(analysis_df, 'prob_fixation', 'mean_steps', with_violin=False)
plot_hybrid_density(analysis_df, 'mean_steps', 'prob_fixation', with_violin=False)


# --- EXAMPLES OF USAGE ---
for prop in GRAPH_PROPS:
    # plot_property_effect(df_to_plot, prop, 'median_steps')
    plot_hybrid_density(df_to_plot, prop, 'mean_steps', density_threshold=50, with_violin=True)
    # plot_hybrid_density(df_to_plot, prop, 'prob_fixation', density_threshold=50, with_violin=True)


In [None]:
plot_hybrid_density(df_to_plot, 'degree_std', 'mean_steps', density_threshold=50, with_violin=True)

plt.figure(figsize=(10, 8))
plt.hexbin(df_to_plot['degree_std'], df_to_plot['mean_steps'], gridsize=20, cmap='YlOrRd', mincnt=1)
plt.xlabel('degree_std')
plt.ylabel('mean_steps')
plt.colorbar(label='count')
plt.title('Hexbin plot: degree_std vs mean_steps')
plt.show()