# Experiment 2: Part Type Collision Analysis

This notebook will contain gathered results from experiment 2.

# Part Type Collision Analysis
## Methodology
For each part type we have, run the experiment many times over many different hyperparameters. Specifically, isolate one hyperparameter, run the experiment over a range of values, tracking the computed collision rate each time. Repeat this for each hyperparameter and each part type.
## Deliverables
Graphs and analysis for the impact of different values of the hyperparmeters. How do they affect the final collision rate? Why are the effecting the collision rate like that? What does this tell us? 
Graphs and analysis for comparing the results across different part types. Are different part types affected in the same way by the same change in hyperperamters? How close are their collision rates? What does this tell us about the relative importance of both hyperparameters and part types. 

## Source Code

The below sections contains all of our source codes.

In [None]:
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import os 

user_path = '~/GitHub/matcher'  # CHANGE THIS LINE AS NEEDED FOR YOUR ENVIRONMENT
os.chdir(os.path.expanduser(user_path))

In [None]:
def get_metrics_series(mlruns_path: str, experiment_id: str, run_id: str, metric_name: str) -> list:
    """Get a series of metric values for a given metric name."""
    with open(f'{mlruns_path}/{experiment_id}/{run_id}/metrics/{metric_name}') as f:
        file_lines = f.readlines()
    return [float(line.split()[1]) for line in file_lines]

In [51]:
experiment_id = mlflow.get_experiment_by_name(name='Experiment 3').experiment_id
runs_df = mlflow.search_runs(experiment_ids=experiment_id, max_results=10_000)
runs_df['monte_carlo_upper_collision_rate_series'] = runs_df.apply(
    lambda row: get_metrics_series(
            mlruns_path='mlruns', 
            experiment_id=experiment_id, 
            run_id=row['run_id'], 
            metric_name='monte_carlo_upper_collision_rate'), 
    axis=1)

print(runs_df.head(1)['monte_carlo_upper_collision_rate_series'])
print(runs_df['params.confidence_bound'])


Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 279, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 372, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1082, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1075, in _read_helper
    result = read_yaml(root, file_name)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/utils/file_util

0    [0.2758493175418901, 0.2686923494918686, 0.274...
Name: monte_carlo_upper_collision_rate_series, dtype: object
0       0.999
1       0.995
2      0.9995
3       0.995
4       0.995
        ...  
379    0.9999
380     0.999
381    0.9999
382     0.995
383    0.9999
Name: params.confidence_bound, Length: 384, dtype: object


In [53]:
base_meta_pdf_ci = 0.995
base_part_pdf_ci = 0.995
base_confidence_bound = 0.995

runs_df['params.meta_pdf_ci'] = runs_df['params.meta_pdf_ci'].astype(float)
runs_df['params.part_pdf_ci'] = runs_df['params.part_pdf_ci'].astype(float)
runs_df['params.confidence_bound'] = runs_df['params.confidence_bound'].astype(float)



In [69]:
mlflow.end_run()
mlflow.start_run()


meta_pdf_ci_analysis_df = runs_df.loc[
    (runs_df['params.confidence_bound'] == base_confidence_bound) &
    (runs_df['params.part_pdf_ci'] == base_part_pdf_ci)]
part_pdf_ci_analysis_df = runs_df.loc[
    (runs_df['params.confidence_bound'] == base_confidence_bound) &
    (runs_df['params.meta_pdf_ci'] == base_meta_pdf_ci)]
confidence_bound_analysis_df = runs_df.loc[
    (runs_df['params.meta_pdf_ci'] == base_meta_pdf_ci) &
    (runs_df['params.part_pdf_ci'] == base_part_pdf_ci)]

meta_pdf_ci_part_groups = meta_pdf_ci_analysis_df.groupby('params.part_type')
part_pdf_ci_part_groups = part_pdf_ci_analysis_df.groupby('params.part_type')
confidence_bound_part_groups = confidence_bound_analysis_df.groupby('params.part_type')


def run_experiment(df_groups, param_col: str):
    
    for part_type, part_group in df_groups:
    
        part_group.sort_values(by=param_col, inplace=True)
        vars = [
            np.var(np.array(param_collision_rates)) 
            for param_collision_rates in part_group['monte_carlo_upper_collision_rate_series'].to_numpy()]    
        plt.plot(part_group[param_col], vars, label=f'{part_type} - Correlation: {np.corrcoef(part_group[param_col], vars)[0,1]:.2f}')
    
    plt.legend()
    plt.title(f'Variance of Collision Rates vs {param_col}')
    plt.xlabel(f'{param_col}')
    plt.ylabel('Variance of Collision Rates')
    plt.savefig(f'psig_matcher/experiments/graphs/variance_of_collision_rates_vs_{param_col}.png')
    mlflow.log_artifact(f'psig_matcher/experiments/graphs/variance_of_collision_rates_vs_{param_col}.png')
    plt.clf()
        
run_experiment(meta_pdf_ci_part_groups, 'params.meta_pdf_ci')
run_experiment(part_pdf_ci_part_groups, 'params.part_pdf_ci')
run_experiment(confidence_bound_part_groups, 'params.confidence_bound')
    

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


<Figure size 640x480 with 0 Axes>

In [39]:
mlflow.set_experiment("Experiment 3 Analysis")
for analysis_type in analysis_groups:
   
    group = analysis_groups[analysis_type]
    x_vals = []
    y_vals = []
    
    for index, df in group:

        col_vals = set(df[analysis_type].to_list())
        if len(col_vals) != 1:
            raise Exception(f"More than one {analysis_type} value in group")

        x_vals.append(col_vals.pop())
        collision_rates = df['monte_carlo_upper_collision_rate_series'].to_list()
        
        
        
    
    print(x_vals)
        #
        
    # plt.plot(x_vals, y_vals, label=f'{analysis_type}s vs upper_collision_rate')
    # plt.xlabel(analysis_type)
    # plt.ylabel(f"Averaged upper_collision_rate across all tested parts")
    # plt.savefig(f"psig_matcher/experiments/graphs/{analysis_type}_vs_upper_collision_rate.png")
    # mlflow.log_artifact(f"psig_matcher/experiments/graphs/{analysis_type}_vs_upper_collision_rate.png")
    

Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 279, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 372, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1082, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1075, in _read_helper
    result = read_yaml(root, file_name)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/matcher/lib/python3.10/site-packages/mlflow/utils/file_util

[5.979761196702748e-08, 0.0, 4.598898637839883e-08, 7.540432714487342e-06, 3.316289033383951e-07, 2.846950949963062e-06, 2.6956088459599897e-06, 2.8586608610424834e-06, 1.6283487106052558e-07, 1.0352224794788592e-05, 3.2268844976230575e-06, 4.866268943864671e-06, 0.0, 5.7356583448720804e-08, 6.467539537221085e-08, 1.9553460336967445e-06, 2.7140507646218722e-06, 4.347771239674689e-06, 3.970472102767036e-08, 2.6207435016581767e-06, 0.0, 4.6167496218134687e-07, 0.0, 5.6412354267438e-07, 2.3604072857520826e-06, 0.0, 0.0, 0.0, 6.286067777118147e-08, 1.3359494410888585e-05, 0.0, 7.871774710914331e-06, 1.6074970577782463e-07, 3.0631068836430844e-06, 2.4613372907154197e-06, 5.9225872782916314e-08, 3.2168071446522543e-06, 1.6176316345884339e-07, 2.156474628112652e-06, 0.0, 0.0, 7.699587379718339e-08, 0.0, 3.616721660456796e-06, 1.4778178764565252e-07, 7.7028371718682e-08, 3.860032088348612e-06, 1.380073345569785e-05, 2.9788078871895426e-08, 2.0229587809832216e-07, 4.0033115916969274e-08, 3.4832

---

## Conclusion

TBD.