# Import Libraries

In [None]:
import json
import numpy as np
import pandas as pd

# Define Functions

In [None]:
def load_weights_from_csv(file_path):
    """
    Load weights from a CSV file into a dictionary.

    Args:
        file_path (str): Path to the CSV file containing weights.

    Returns:
        dict: Dictionary mapping parameters to disease weights.
    """
    df = pd.read_csv(file_path)
    weights = {}
    for _, row in df.iterrows():
        param = row["Parameter"]
        weights[param] = {disease: row[disease] for disease in df.columns[1:]}
    return weights

def map_transcript_to_prediction_format(transcript):
    """
    Convert the transcript output JSON into the required format for disease prediction.

    Args:
        transcript (dict): JSON-like dictionary containing the transcript output.

    Returns:
        dict: Transformed patient data ready for disease prediction.
    """
    # Flatten and map keys to the expected prediction format
    patient_data = {}

    # Symptoms & Severity
    symptoms = transcript.get("Symptoms & Severity", {})
    for symptom, value in symptoms.items():
        patient_data[symptom] = value

    # Laboratory Tests
    lab_tests = transcript.get("Laboratory Tests", {})
    for test, details in lab_tests.items():
        if "value" in details:
            patient_data[test] = details["value"]

    # Imaging Studies
    imaging = transcript.get("Imaging Studies", {})
    for study, result in imaging.items():
        patient_data[study] = result

    # Pain Characteristics (if relevant)
    pain_characteristics = transcript.get("Pain Characteristics", {}).get("chest_pain", {})
    if pain_characteristics:
        patient_data["pain_characteristics"] = {
            "location": pain_characteristics.get("location", "unknown"),
            "radiation": pain_characteristics.get("radiation", "unknown"),
            "aggravating_factors": pain_characteristics.get("aggravating_factors", "unknown"),
            "alleviating_factors": pain_characteristics.get("alleviating_factors", "unknown")
        }

    return patient_data

def calculate_confidence(diseases):
    """
    Compute confidence level based on probability distribution.
    Uses information gain principles to assess certainty.

    Args:
        diseases (dict): Dictionary of disease probabilities.

    Returns:
        float: Confidence percentage.
    """
    sorted_probs = sorted(diseases.values(), reverse=True)
    top_prob = sorted_probs[0]
    second_prob = sorted_probs[1] if len(sorted_probs) > 1 else 0
    
    # Confidence increases with probability margin
    confidence = (top_prob - second_prob) * 100  # Emphasizing separation
    confidence = max(10, min(confidence, 95))  # Bound between 10% and 95%
    
    return round(confidence, 2)

def predict_disease(patient_data, weights):
    """
    Predict the most likely disease from a set of seven possible conditions
    based on patient input data.

    Args:
        patient_data (dict): A JSON-like dictionary containing structured patient data.
        weights (dict): Dictionary containing weights for each parameter.

    Returns:
        dict: Predicted disease with probabilities and confidence analysis.
    """

    # Initialize disease probabilities
    diseases = {disease: 0 for disease in list(weights.values())[0].keys()}
    factor_contributions = {disease: [] for disease in diseases}
    
    # Iterate over patient data and update probabilities
    for key, value in patient_data.items():
        if key in weights and value != "unknown":
            for disease, weight in weights[key].items():
                diseases[disease] += weight
                factor_contributions[disease].append({"factor": key, "weight": weight})

    # Normalize probabilities
    total_weight = sum(diseases.values())
    if total_weight > 0:
        for disease in diseases:
            diseases[disease] = round((diseases[disease] / total_weight) * 100, 2)
    
    # Compute confidence
    confidence_percentage = calculate_confidence(diseases)
    
    # Find the most likely disease
    predicted_disease = max(diseases, key=diseases.get)

    return {
        "predicted_disease": predicted_disease,
        "probabilities": diseases,
        "confidence_percentage": confidence_percentage,
        "contributing_factors": factor_contributions[predicted_disease]
    }

# Main Function

In [None]:
if __name__ == "__main__":
    # Load weights from CSV
    weights_file = "DUCG_Weight_Table.csv"  # Ensure this file is available in the working directory
    weights = load_weights_from_csv(weights_file)

    # Load example transcript output
    with open("transcript_output.json", "r") as f:
        transcript = json.load(f)

    # Map transcript to prediction format
    patient_data = map_transcript_to_prediction_format(transcript)

    # Predict disease
    prediction = predict_disease(patient_data, weights)
    print(json.dumps(prediction, indent=4))