In [13]:
import os
import logging
import pandas as pd
import numpy as np
import csv
from scipy.stats import ks_2samp

log_path = r'C:\Users\kanha\BAM_kgupta\radiographic_identifier\logs\similarity.log'
log_dir = os.path.dirname(log_path)

# Ensure the logs directory exists
os.makedirs(log_dir, exist_ok=True)

# Create the log file if it doesn't exist
if not os.path.exists(log_path):
    with open(log_path, 'w') as log_file:
        log_file.write('')  # Create an empty log file

print(f'Log file ready at: {log_path}')

# Configure logging
logging.basicConfig(
    filename='logs/similarity.log', level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

Log file ready at: C:\Users\kanha\BAM_kgupta\radiographic_identifier\logs\similarity.log


In [37]:
class SimilarityAnalysis:
    def __init__(self, config):
        self.config = config
        self.samples = []
        self.main_sample = None
    
    def load_data(self):
        """Load the main sample and available comparison samples."""
        try:
            base_path = os.path.join(self.config["Input_Path"], "Processed_Data", f"Pores_{self.config['No_of_pores']}", "Similarity")
            base_path = base_path.replace("\\", "/")  # Fixes mixed slashes
            sample_path = os.path.join(base_path, f"{self.config['Sample_name']}.csv")
            sample_path = sample_path.replace("\\", "/")  # Fixes mixed slashes
            self.main_sample = pd.read_csv(sample_path)
            all_files = os.listdir(base_path)
            self.samples = [file for file in all_files if file != f"{self.config['Sample_name']}.csv"]
            
            if len(self.samples) < 1:
                logging.warning("Only one sample found. No comparison possible.")
                exit()
            logging.info("Data loaded successfully.")
        except Exception as e:
            logging.error(f"Error loading data: {e}")
            raise
    
    def compare_samples(self, sample1, sample2):
        """Perform a KS-test comparison between two samples."""
        try:
            ks_test = ks_2samp(sample1["distance"], sample2["distance"])
            return ks_test[1] * 100  # p-value percentage
        except Exception as e:
            logging.error(f"Error in sample comparison: {e}")
            raise
    
    def compute_similarity(self):
        """Compute similarity between main sample and other available samples."""
        results = {}
        try:
            for file in self.samples:
                sample_path = os.path.join(self.config["Input_Path"], "Processed_Data", f"Pores_{self.config['No_of_pores']}", "Similarity", file)
                sample_df = pd.read_csv(sample_path)
                similarity_score = self.compare_samples(self.main_sample, sample_df)
                results[file.replace(".csv", "")] = similarity_score
                logging.info(f"Compared {self.config['Sample_name']} with {file}: {similarity_score:.2f}% similarity")
            return results
        except Exception as e:
            logging.error(f"Error computing similarity: {e}")
            raise
    
    def save_results(self, results):
        """Save similarity results to a text file."""
        try:
            output_path = os.path.join(self.config["Output_Path"], "Pores_", str(self.config["No_of_pores"]), "Similarity_result")
            os.makedirs(output_path, exist_ok=True)
            output_file = os.path.join(output_path, f"{self.config['Sample_name']}_SIMILARITY.txt")
            
            with open(output_file, "w") as f:
                for sample, score in results.items():
                    f.write(f"{sample}: {score:.2f}%\n")
            logging.info("Similarity results saved successfully.")
        except Exception as e:
            logging.error(f"Error saving results: {e}")
            raise
    
    def run_analysis(self):
        """Run the full similarity analysis process."""
        try:
            self.load_data()
            results = self.compute_similarity()
            self.save_results(results)
            logging.info("Similarity analysis completed successfully.")
        except Exception as e:
            logging.error(f"Analysis pipeline failed: {e}")
            raise

if __name__ == "__main__":
    # Sample configuration dictionary (replace with actual config loading)
    config = {
        "Sample_name": "XCT-1",
        "Input_Path": "/Users/kanha/BAM_kgupta/radiographic_identifier/Data/",
        "Output_Path": "./Output/",
        "No_of_pores": 16
    }
    analysis = SimilarityAnalysis(config)
    analysis.run_analysis()

In [27]:
import os

base_path = "/Users/kanha/BAM_kgupta/radiographic_identifier/Data/Processed_Data"
file_path = os.path.join(base_path, "Pores_10", "Similarity", "XCT-1.csv")
fixed_path = file_path.replace("\\", "/")  # Fixes mixed slashes

print(fixed_path)  # Outputs a correctly formatted path


/Users/kanha/BAM_kgupta/radiographic_identifier/Data/Processed_Data/Pores_10/Similarity/XCT-1.csv
