In [3]:
import os
import yaml
import csv
import numpy as np
import pandas as pd
from yaml.loader import FullLoader

class SimilarityTest:
    def __init__(self, config_path):
        self.config_QID = self.load_config(config_path)
        self.files = self.get_files()
        self.nos_samples = len(self.files)
        if self.nos_samples < 1:
            print("There is only one sample and nothing to test")
            exit()
        self.main_sample = self.load_main_sample()
        self.dataframes_list = self.load_samples()
        self.Uniqueness_result = pd.DataFrame(columns=self.files)

    def load_config(self, config_path):
        with open(config_path, "rb") as file:
            return yaml.load(file, Loader=FullLoader)

    def get_files(self):
        path, dirs, files = next(os.walk(self.config_QID["Input_Path"] + "Processed_Data/Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity/"))
        files.remove(self.config_QID["Sample_name"] + ".csv")
        return files

    def load_main_sample(self):
        file_path = self.config_QID["Input_Path"] + "Processed_Data/Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity/" + self.config_QID["Sample_name"] + ".csv"
        return pd.read_csv(file_path)

    def load_samples(self):
        dataframes_list = []
        for file in self.files:
            file_path = self.config_QID["Input_Path"] + "Processed_Data/Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity/" + file
            dataframes_list.append(pd.read_csv(file_path))
        return dataframes_list

    def perform_test(self):
        main_file_path = self.config_QID["Input_Path"] + "Processed_Data/Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity/" + self.config_QID["Sample_name"] + ".csv"
        with open(main_file_path) as mainfile:
            csvReader_main = csv.reader(mainfile, delimiter=',')
            for i, row_main in enumerate(csvReader_main):
                if i == 0:
                    continue
                D, V1, V2 = map(float, row_main[2:5])
                uncertainty_dist = self.config_QID["Uncertainty_window"] * self.config_QID["Resolution_XCT"]
                r1 = np.round(((3*V1)/(4*np.pi))**(1/3))
                r2 = np.round(((3*V2)/(4*np.pi))**(1/3))
                diff_vol1_b = 4/3*np.pi*(r1 + uncertainty_dist)**3 
                diff_vol1_s = 4/3*np.pi*(r1 - uncertainty_dist)**3
                diff_vol2_b = 4/3*np.pi*(r2 + uncertainty_dist)**3 
                diff_vol2_s = 4/3*np.pi*(r2 - uncertainty_dist)**3
                Check_unique_point = self.compare_samples(D, V1, V2, diff_vol1_s, diff_vol1_b, diff_vol2_s, diff_vol2_b)
                self.Uniqueness_result.loc[len(self.Uniqueness_result)] = Check_unique_point
        self.Uniqueness_result.index += 1
        self.save_results()

    def compare_samples(self, D, V1, V2, diff_vol1_s, diff_vol1_b, diff_vol2_s, diff_vol2_b):
        Check_unique_point = []
        for file in self.files:
            Test_result = []
            file_path = self.config_QID["Input_Path"] + "Processed_Data/Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity/" + file
            with open(file_path) as otherfile:
                csvReader_other = csv.reader(otherfile, delimiter=',')
                for j, row_other in enumerate(csvReader_other):
                    if j == 0:
                        continue
                    D_t, V1_t, V2_t = map(float, row_other[2:5])
                    cond_D = np.abs(D - D_t) <= self.config_QID["Uncertainty_window"] * self.config_QID["Resolution_XCT"]
                    cond_V1 = (diff_vol1_s <= V1_t <= diff_vol1_b) or (diff_vol1_s <= V2_t <= diff_vol1_b)
                    cond_V2 = (diff_vol2_s <= V2_t <= diff_vol2_b)
                    final_cond = cond_D and cond_V1 and cond_V2
                    Test_result.append(final_cond)
            Check_unique_point.append(any(Test_result))
        return Check_unique_point

    def save_results(self):
        output_path = self.config_QID["Output_Path"] + "Pores_" + str(self.config_QID["No_of_pores"]) + "/CSV_data/" + self.config_QID["Sample_name"] + "_similarity_result.csv"
        self.Uniqueness_result.to_csv(output_path, index=False)
        self.write_similarity_results()

    def write_similarity_results(self):
        output_path = self.config_QID["Output_Path"] + "Pores_" + str(self.config_QID["No_of_pores"]) + "/Similarity_result/" + self.config_QID["Sample_name"] + "_SIMILARITY.txt"
        with open(output_path, "a") as file:
            for col in self.Uniqueness_result.columns:
                false_count = self.Uniqueness_result[col].value_counts().get(False, 0)
                truth_count = self.Uniqueness_result[col].value_counts().get(True, 0)
                unique_percent = (truth_count * 100) / (truth_count + false_count) if (truth_count + false_count) > 0 else 0
                file.write(f"{col}  {round(unique_percent, 2)}%\n")
        self.remove_duplicates(output_path)

    def remove_duplicates(self, file_path):
        with open(file_path, "r") as file:
            lines = set(file.readlines())
        with open(file_path, "w") as file:
            file.writelines(lines)

# Example usage:
similarity_test = SimilarityTest("../config/config.yaml")
similarity_test.perform_test()


StopIteration: 