In [1]:
from fast_cleaning import fast_cleaning_longer, fast_cleaning_merge
from cleaning_old_longer import cleaning_old_longer
from cleaning_old_merge import cleaning_old_merge
from validation import get_efficiency, get_fake_rate
from coating_cleaning import coating_cleaning
from matplotlib import pyplot as plt
from get_data import *
from time import time
import pandas as pd
import matplotlib
import os


In [2]:
path = "data/selecting_pri_and_sec"
event_list = os.listdir(path)
cleaning_type_list = ["cleaning_old_longer",
                      "cleaning_old_merge",
                      "fast_cleaning_longer",
                      "fast_cleaning_merge",
                      "coating_cleaning"]

cleaning_func_list = [cleaning_old_longer,
                      cleaning_old_merge,
                      fast_cleaning_longer,
                      fast_cleaning_merge,
                      coating_cleaning]
plot_title = ["Efficiency", "Cleaning efficiency/Raw efficiency", "Time", "Fake rate"]

In [3]:
result = {cleaning_type: [] for cleaning_type in cleaning_type_list}
event_list = [event_list[i:i + 2] for i in range(0, len(event_list), 2)]
result_efficiency = pd.DataFrame(columns=cleaning_type_list.append("raw"))
result_time = pd.DataFrame(columns=cleaning_type_list)
result_fake_rate = pd.DataFrame(columns=cleaning_type_list)

In [None]:
for event_number, event in enumerate(event_list):
    hit_file = event[0]
    track_file = event[1]

    tracks = get_tracks_data(f'{path}/{track_file}')
    hits = get_hits_data_for_validation(f'{path}/{hit_file}')
    
    print(f"###################################{hit_file}###################################")
    if not len(tracks) or not len(hits):
        print("ZERO")
        continue

    for cleaning_type, cleaning_func in zip(cleaning_type_list, cleaning_func_list):
        s = time()
        cleaned_tracks = cleaning_func(list(map(lambda x: x.copy(), tracks)))
        result_time.at[event_number, cleaning_type] = time() - s
        result_efficiency.at[event_number, "raw"] = get_efficiency(tracks, hits, min_length=9)
        result_efficiency.at[event_number, cleaning_type] = get_efficiency(cleaned_tracks, hits, min_length=9)
        result_fake_rate.at[event_number, cleaning_type] = get_fake_rate(cleaned_tracks, hits)

In [5]:
temp_efficiency = result_efficiency[result_efficiency["raw"] != 0]
temp_efficiency = temp_efficiency.div(temp_efficiency["raw"], axis=0)

In [None]:
matplotlib.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(4, 1, figsize=(30, 30))

for i, df in enumerate([result_efficiency, temp_efficiency, result_time, result_fake_rate]):
    for column in df.columns.difference(["raw"]):
        axs[i].plot(df.index, df[column], label=column)
    axs[i].set_xlabel("Индекс")
    axs[i].set_ylabel("Значение")
    axs[i].set_title(plot_title[i])
    axs[i].legend()

plt.tight_layout()
plt.show()