In [None]:
import os
import csv
import numpy as np
import pandas as pd
from pandas import DataFrame
from module_list import *
from functions import *

class SimilarityTest:
    def __init__(self, config):
        self.config = config
        self.input_path = config["Input_Path"]
        self.output_path = config["Output_Path"]
        self.num_pores = config["No_of_pores"]
        self.sample_name = config["Sample_name"]
        self.uncertainty_window = config["Uncertainty_window"]
        self.resolution_xct = config["Resolution_XCT"]
        
        self.similarity_dir = os.path.join(self.input_path, "Processed_Data", f"Pores_{self.num_pores}", "Similarity")
        self.result_dir = os.path.join(self.output_path, f"Pores_{self.num_pores}", "Similarity_result")
        self.csv_output_dir = os.path.join(self.output_path, f"Pores_{self.num_pores}", "CSV_data")
        
        if not self.config["Similarity_study"]:
            exit()
    
    def load_samples(self):
        _, _, files = next(os.walk(self.similarity_dir))
        files.remove(self.sample_name + ".csv")
        if not files:
            print("There is only one sample and nothing to test")
            exit()
        return files
    
    def compute_similarity(self):
        files = self.load_samples()
        main_sample = pd.read_csv(os.path.join(self.similarity_dir, f"{self.sample_name}.csv"))
        dataframes_list = [pd.read_csv(os.path.join(self.similarity_dir, f)) for f in files]
        
        Uniqueness_result = DataFrame(columns=files)
        uncertainty_dist = self.uncertainty_window * self.resolution_xct
        
        with open(os.path.join(self.similarity_dir, f"{self.sample_name}.csv")) as mainfile:
            csvReader_main = csv.reader(mainfile, delimiter=',')
            for i, row_main in enumerate(csvReader_main):
                if i == 0:
                    continue
                D, V1, V2 = map(float, row_main[2:5])
                
                r1 = np.round(((3 * V1) / (4 * np.pi)) ** (1 / 3))
                r2 = np.round(((3 * V2) / (4 * np.pi)) ** (1 / 3))
                diff_vol1_b = (4 / 3) * np.pi * (r1 + uncertainty_dist) ** 3
                diff_vol1_s = (4 / 3) * np.pi * (r1 - uncertainty_dist) ** 3
                diff_vol2_b = (4 / 3) * np.pi * (r2 + uncertainty_dist) ** 3
                diff_vol2_s = (4 / 3) * np.pi * (r2 - uncertainty_dist) ** 3
                
                Check_unique_point = []
                for test_file in files:
                    Test_result = []
                    with open(os.path.join(self.similarity_dir, test_file)) as otherfile:
                        csvReader_other = csv.reader(otherfile, delimiter=',')
                        for j, row_other in enumerate(csvReader_other):
                            if j == 0:
                                continue
                            D_t, V1_t, V2_t = map(float, row_other[2:5])
                            
                            cond_D = abs(D - D_t) <= uncertainty_dist
                            cond_V1 = (diff_vol1_s <= V1_t <= diff_vol1_b) or (diff_vol1_s <= V2_t <= diff_vol1_b)
                            cond_V2 = (diff_vol2_s <= V2_t <= diff_vol2_b) or (diff_vol2_s <= V1_t <= diff_vol2_b)
                            
                            final_cond = cond_D and cond_V1 and cond_V2
                            Test_result.append(final_cond)
                    
                    Check_unique_point.append(any(Test_result))
                
                Uniqueness_result.loc[len(Uniqueness_result)] = Check_unique_point
        
        Uniqueness_result.index += 1  
        Uniqueness_result.to_csv(os.path.join(self.csv_output_dir, f"{self.sample_name}_similarity_result.csv"), index=False)
        self.write_results(Uniqueness_result, files)
    
    def write_results(self, Uniqueness_result, files):
        result_file = os.path.join(self.result_dir, f"{self.sample_name}_SIMILARITY.txt")
        with open(result_file, "a") as file_old:
            for i, file in enumerate(files):
                false_count = Uniqueness_result[file].value_counts().get(False, 0)
                truth_count = Uniqueness_result[file].value_counts().get(True, 0)
                unique_percent = (truth_count * 100) / (truth_count + false_count) if (truth_count + false_count) > 0 else 0
                file_old.write(f"{file}  {round(unique_percent, 2)}%\n")
        
        self.remove_duplicates(result_file)
    
    def remove_duplicates(self, file_path):
        with open(file_path, "r") as file_read:
            lines = set(file_read.readlines())
        with open(file_path, "w") as out:
            out.writelines(lines)
    
if __name__ == "__main__":
    similarity_test = SimilarityTest(config_QID)
    similarity_test.compute_similarity()


In [None]:
"""
File: similarity.py
Author: Kanhaiya Gupta
Date: 2023-08-29
Description: A Python script applying the authentication concept.

"""

from module_list import *
from functions import *

if not config_QID["Similarity_study"]:
    exit()

# assign path
path, dirs, files = next(os.walk(config_QID["Input_Path"] + "Processed_Data" + "/" + "Pores_" + str(config_QID["No_of_pores"]) + "/"  + "Similarity/"))
files.remove(config_QID["Sample_name"] + ".csv")

#print(files)
nos_samples = len(files)  # file count

if nos_samples < 1:
    print("There is only one sample and nothing to test")
    exit()

# KS-test 

# create empty list
dataframes_list = []

main_sample = pd.read_csv(config_QID["Input_Path"] + "Processed_Data" + "/" + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "Similarity/" + config_QID["Sample_name"] + ".csv")

for i in range(nos_samples):
    temp_df = pd.read_csv(config_QID["Input_Path"] + "Processed_Data" + "/" + "Pores_" + str(config_QID["No_of_pores"]) + "/"  + "Similarity/" + files[i])
    dataframes_list.append(temp_df)
     

# append datasets to the list

Uniqueness_result = DataFrame(columns = files)
# Load the main segmented CT data
 
with open(config_QID["Input_Path"] + "Processed_Data" + "/" + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "Similarity/" + config_QID["Sample_name"] + ".csv") as mainfile:
    csvReader_main = csv.reader(mainfile, delimiter=',')
    for i, row_main in enumerate(csvReader_main):
        if i == 0:
            continue
        #print(row_main)

        D = float(row_main[2])
        V1 = float(row_main[3])
        V2 = float(row_main[4])

        uncertainty_dist = config_QID["Uncertainty_window"]*(config_QID["Resolution_XCT"]) 

        r1 = np.round(((3*V1)/(4*np.pi))**(1/3))
        r2 = np.round(((3*V2)/(4*np.pi))**(1/3))
        diff_vol1_b = 4/3*np.pi*(r1 + uncertainty_dist)**3 
        diff_vol1_s = 4/3*np.pi*(r1 - uncertainty_dist)**3
        diff_vol2_b = 4/3*np.pi*(r2 + uncertainty_dist)**3 
        diff_vol2_s = 4/3*np.pi*(r2 - uncertainty_dist)**3
        
        Sample_unique = True
        Check_unique_point = []
        for k in range(nos_samples):
            Test_result = []

            with open(config_QID["Input_Path"] + "Processed_Data" + "/" + "Pores_" + str(config_QID["No_of_pores"]) + "/"  + "Similarity/" + files[k]) as otherfiles:
                csvReader_other = csv.reader(otherfiles, delimiter=',')
                for j, row_other in enumerate(csvReader_other):
                    if j == 0:
                        continue
                    #print(row_other)

                    D_t = float(row_other[2])
                    V1_t = float(row_other[3])
                    V2_t = float(row_other[4])
                    

                    cond_D = False
                    cond_V1 = False
                    cond_V2 = False

                    cond_D = np.abs(D - D_t) <= uncertainty_dist
                
                    if ((V1_t >= diff_vol1_s) & (V1_t <= diff_vol1_b)):
                        cond_V1 = True
                        if ((V2_t >= diff_vol2_s) & (V2_t <= diff_vol2_b)):
                            cond_V2 = True
                    elif ((V2_t >= diff_vol1_s) & (V2_t <= diff_vol1_b)):
                        cond_V1 = True
                        if ((V2_t >= diff_vol2_s) & (V2_t <= diff_vol2_b)):
                            cond_V2 = True

                    final_cond = cond_D & cond_V1 & cond_V2
                    

                    if final_cond:
                        Test_result.append(True)
                    else:
                        Test_result.append(False)

            Check_unique_point.append(any(Test_result)) 

        Uniqueness_result.loc[len(Uniqueness_result)] = Check_unique_point
        #Sample_unique = Sample_unique * Check_unique_point[0]
        #print("The test result is: ", Check_unique_point[0])

Uniqueness_result.index += 1  
Uniqueness_result.to_csv(config_QID["Output_Path"] + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "CSV_data/"+ config_QID["Sample_name"] + "_similarity_result.csv", index=False)

no_of_cols = len(Uniqueness_result.axes[1])

file_old = open(config_QID["Output_Path"] + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "Similarity_result/" + config_QID["Sample_name"] + "_SIMILARITY.txt","a")


for i in range(no_of_cols):
    false_count = 0
    truth_count = 0
    false_count = (Uniqueness_result[Uniqueness_result.columns[i]]).value_counts()[False]
    if (false_count != len(Uniqueness_result)):
        truth_count = (Uniqueness_result[Uniqueness_result.columns[i]]).value_counts()[True]
    unique_percent = (truth_count * 100)/(truth_count + false_count)
    file_old.write(Uniqueness_result.columns[i] + "  ")
    file_old.write(str(round(unique_percent, 2)) +  "%" + "\n")

file_old.close()



file_read = open(config_QID["Output_Path"] + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "Similarity_result/" + config_QID["Sample_name"] + "_SIMILARITY.txt","r")
lines = file_read.readlines()
#print(lines)
lines_set = set(lines)
file_read.close()
#print(lines_set)

out = open(config_QID["Output_Path"] + "Pores_" + str(config_QID["No_of_pores"]) + "/" + "Similarity_result/" + config_QID["Sample_name"] + "_SIMILARITY.txt","w")

for line in lines_set:
    out.write(line)
    #print(line)
    
out.close()

