In [None]:
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import os 

user_path = '~/GitHub/matcher'  # CHANGE THIS LINE AS NEEDED FOR YOUR ENVIRONMENT
os.chdir(os.path.expanduser(user_path))

In [None]:
def get_metrics_series(mlruns_path: str, experiment_id: str, run_id: str, metric_name: str) -> list:
    """Get a series of metric values for a given metric name."""
    with open(f'{mlruns_path}/{experiment_id}/{run_id}/metrics/{metric_name}') as f:
        file_lines = f.readlines()
    return [float(line.split()[1]) for line in file_lines]

In [None]:
experiment_id = mlflow.get_experiment_by_name(name='Experiment 3 - test').experiment_id
runs_df = mlflow.search_runs(experiment_ids=experiment_id, max_results=10_000)

runs_df['params.part_dim'] = runs_df['params.part_dim'].astype(float)
runs_df['metrics.monte_carlo_upper_collision_rate'] = runs_df['metrics.monte_carlo_upper_collision_rate'].astype(float)
runs_df['params.part_pdf_ci'] = runs_df['params.part_pdf_ci'].astype(float)
runs_df['params.confidence_bound'] = runs_df['params.confidence_bound'].astype(float)
runs_df['params.meta_pdf_ci'] = runs_df['params.meta_pdf_ci'].astype(float)


In [None]:

part_dim = [2, 3, 5]
meta_pdf_ci = [0.95, 0.99, 0.999]
part_pdf_ci = [0.95, 0.99, 0.999]
confidence_bound = [0.95, 0.99, 0.999, 0.9999]

base_meta_pdf_ci = 0.99
base_part_pdf_ci = 0.99
base_confidence_bound = 0.99


In [None]:
mlflow.set_experiment('Experiment 2 Analysis - 2')
mlflow.end_run()
mlflow.start_run()

part_dim_analysis_df = runs_df.loc[
    (runs_df['params.meta_pdf_ci'] == base_meta_pdf_ci) &
    (runs_df['params.part_pdf_ci'] == base_part_pdf_ci) &
    (runs_df['params.confidence_bound'] == base_confidence_bound)]


part_dim_part_groups = part_dim_analysis_df.groupby('params.part_type')
def run_experiment(df_groups, param_col: str):
    
    for part_type, part_group in df_groups:
        part_group.sort_values(by=param_col, inplace=True)
        collision_rate = part_group['metrics.monte_carlo_upper_collision_rate'].to_numpy()
        plt.plot(part_group[param_col], collision_rate, label=f'{part_type} - Correlation: {np.corrcoef(part_group[param_col], collision_rate)[0,1]:.2f}')
    
    plt.legend()
    plt.title(f'Estimated Upper Collision Rate vs {param_col}')
    plt.xlabel(f'{param_col}')
    plt.ylabel('Estimated Upper Collision Rate')
    plt.savefig(f'psig_matcher/experiments/graphs/collision_rate_vs_{param_col}.png')
    mlflow.log_artifact(f'psig_matcher/experiments/graphs/collision_rate_vs_{param_col}.png')
    plt.clf()
        
run_experiment(part_dim_part_groups, 'params.part_dim')
    

In [50]:

def run_part_type_averaged_experiment(df_groups, param_col: str):
    
    y_vals = []
    for _, part_group in df_groups:
        
        part_group.sort_values(by=param_col, inplace=True)
        y_vals.append(part_group['metrics.monte_carlo_upper_collision_rate'].to_numpy())
        
    averaged_y_vals = np.mean(y_vals, axis=0)    
    plt.plot(part_group[param_col], averaged_y_vals, label=f'Averaged Across Part Types - Correlation: {np.corrcoef(part_group[param_col], averaged_y_vals)[0,1]:.2f}')
    plt.legend()
    plt.title(f'Estimated Upper Collision Rate vs {param_col}')
    plt.xlabel(f'{param_col}')
    plt.ylabel('Estimated Upper Collision Rate')
    plt.savefig(f'psig_matcher/experiments/graphs/averaged_collision_rate_vs_{param_col}.png')
    mlflow.log_artifact(f'psig_matcher/experiments/graphs/averaged_collision_rate_vs_{param_col}.png')
    plt.clf()
    
run_part_type_averaged_experiment(part_dim_part_groups, 'params.part_dim')

<Figure size 640x480 with 0 Axes>