# Experiment 2: Part Type Collision Analysis

This notebook will contain gathered results from experiment 2.

# Part Type Collision Analysis
## Methodology
For each part type we have, run the experiment many times over many different hyperparameters. Specifically, isolate one hyperparameter, run the experiment over a range of values, tracking the computed collision rate each time. Repeat this for each hyperparameter and each part type.
## Deliverables
Graphs and analysis for the impact of different values of the hyperparmeters. How do they affect the final collision rate? Why are the effecting the collision rate like that? What does this tell us? 
Graphs and analysis for comparing the results across different part types. Are different part types affected in the same way by the same change in hyperperamters? How close are their collision rates? What does this tell us about the relative importance of both hyperparameters and part types. 

## Source Code

The below sections contains all of our source codes.

In [None]:
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import os 

user_path = '~/GitHub/matcher'  # CHANGE THIS LINE AS NEEDED FOR YOUR ENVIRONMENT
os.chdir(os.path.expanduser(user_path))

In [None]:
def get_metrics_series(mlruns_path: str, experiment_id: str, run_id: str, metric_name: str) -> list:
    """Get a series of metric values for a given metric name."""
    with open(f'{mlruns_path}/{experiment_id}/{run_id}/metrics/{metric_name}') as f:
        file_lines = f.readlines()
    return [float(line.split()[1]) for line in file_lines]

In [None]:
experiment_id = mlflow.get_experiment_by_name(name='Experiment 4').experiment_id
runs_df = mlflow.search_runs(experiment_ids=experiment_id, max_results=10_000)

In [None]:
base_meta_pdf_ci = 0.995
base_part_pdf_ci = 0.995
base_confidence_bound = 0.995
part_dim_base = 2

runs_df['params.part_dim'] = runs_df['params.part_dim'].astype(float)
runs_df['metrics.average_part_pdf_entropy'] = runs_df['metrics.average_part_pdf_entropy'].astype(float)
runs_df['params.part_pdf_ci'] = runs_df['params.part_pdf_ci'].astype(float)
runs_df['params.confidence_bound'] = runs_df['params.confidence_bound'].astype(float)
runs_df['params.meta_pdf_ci'] = runs_df['params.meta_pdf_ci'].astype(float)



In [None]:
mlflow.set_experiment('Experiment 4 Analysis')
mlflow.end_run()
mlflow.start_run()


part_pdf_ci_analysis_df = runs_df.loc[
    (runs_df['params.confidence_bound'] == base_confidence_bound) &
    (runs_df['params.meta_pdf_ci'] == base_meta_pdf_ci)&
    (runs_df['params.part_dim'] == part_dim_base)]
part_dim_analysis_df = runs_df.loc[
    (runs_df['params.meta_pdf_ci'] == base_meta_pdf_ci) &
    (runs_df['params.part_pdf_ci'] == base_part_pdf_ci) &
    (runs_df['params.confidence_bound'] == base_confidence_bound)]

part_pdf_ci_part_groups = part_pdf_ci_analysis_df.groupby('params.part_type')
part_dim_part_groups = part_dim_analysis_df.groupby('params.part_type')

def run_experiment(df_groups, param_col: str):
    
    for part_type, part_group in df_groups:
        
        print(part_group['metrics.average_part_pdf_entropy'])
        part_group.sort_values(by=param_col, inplace=True)
        average_entropys = part_group['metrics.average_part_pdf_entropy'].to_numpy()
        plt.plot(part_group[param_col], average_entropys, label=f'{part_type} - Correlation: {np.corrcoef(part_group[param_col], average_entropys)[0,1]:.2f}')
    
    plt.legend()
    plt.title(f'Entropy of System vs {param_col}')
    plt.xlabel(f'{param_col}')
    plt.ylabel('Entropy of System')
    plt.savefig(f'psig_matcher/experiments/graphs/entropy_of_system_vs_{param_col}.png')
    mlflow.log_artifact(f'psig_matcher/experiments/graphs/entropy_of_system_vs_{param_col}.png')
    plt.clf()
        
run_experiment(part_dim_part_groups, 'params.part_dim')
run_experiment(part_pdf_ci_part_groups, 'params.part_pdf_ci')
    

In [35]:

def run_part_type_averaged_experiment(df_groups, param_col: str):
    
    y_vals = []
    for _, part_group in df_groups:
        
        part_group.sort_values(by=param_col, inplace=True)
        y_vals.append(part_group['metrics.average_part_pdf_entropy'].to_numpy())
        
    averaged_y_vals = np.mean(y_vals, axis=0)    
    plt.plot(part_group[param_col], averaged_y_vals, label=f'Averaged Across Part Types - Correlation: {np.corrcoef(part_group[param_col], averaged_y_vals)[0,1]:.2f}')
    plt.legend()
    plt.title(f'Entropy of System vs {param_col}')
    plt.xlabel(f'{param_col}')
    plt.ylabel('Entropy of System')
    plt.savefig(f'psig_matcher/experiments/graphs/averaged_entropy_of_system_vs_{param_col}.png')
    mlflow.log_artifact(f'psig_matcher/experiments/graphs/averaged_entropy_of_system_vs_{param_col}.png')
    plt.clf()
    
run_part_type_averaged_experiment(part_dim_part_groups, 'params.part_dim')
run_part_type_averaged_experiment(part_pdf_ci_part_groups, 'params.part_pdf_ci')


<Figure size 640x480 with 0 Axes>