List all the team members BITS ID ,Name along with % of contribution in this assignment:
1. 2025ae05985 Ahamed Imthias 100% 
2. 2025ae05984 CHAKRAVADHANULA VINAY KUMAR 100%
3. 2025ae05986 ROUZIF RASHEED MOOPAN 100%
4. 2025ae05987 Y GOWTHAM KUMAR REDDY 100%
5. 2025ae05983 SAKSHAM JINDAL 100%

In [7]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.preprocessing import LabelEncoder

# --- Configuration ---
CSV_FILE_PATH = "Medicalpremium.csv" # Ensure this file is in the same directory
DECISION_TREE_MAX_DEPTH = 4
PROLOG_OUTPUT_FILE = "ACI_Assignment_2_Part_B_Group_217.pl"

def generate_prolog_rules_from_csv(csv_file_path, max_depth):

    # Point 1: Data Acquisition and Preprocessing for Decision Tree
    try:
        df = pd.read_csv(csv_file_path)
        print(f"Successfully loaded data from {csv_file_path}. Shape: {df.shape}")
    except FileNotFoundError:
        print(f"Error: The file '{csv_file_path}' was not found.")
        print("Please ensure the CSV file is in the same directory as this script/notebook or provide the full path.")
        print("Exiting as the dataset is essential.")
        return "", [], {}
    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")
        print("Exiting due to data loading error.")
        return "", [], {}

    median_premium = df['PremiumPrice'].median()
    df['PremiumCategory'] = df['PremiumPrice'].apply(lambda x: 'High' if x > median_premium else 'Low')
    print(f"Median PremiumPrice used for classification: {median_premium:.2f}")
    print(f"PremiumCategory distribution:\n{df['PremiumCategory'].value_counts()}")

    df = df.drop('PremiumPrice', axis=1)

    X = df.drop('PremiumCategory', axis=1)
    y = df['PremiumCategory']

    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    if 'High' in le.classes_ and 'Low' in le.classes_:
        if list(le.classes_)[0] == 'High':
            y_encoded = 1 - y_encoded
            target_names = ['Low', 'High']
        else:
            target_names = ['Low', 'High']
    else:
        target_names = list(le.classes_)

    print(f"Target variable mapping: {dict(zip(le.classes_, le.transform(le.classes_)))}")
    print(f"Internal target names for decision tree: {target_names}")

    # Point 1: Build Decision Tree
    dt_classifier = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    dt_classifier.fit(X, y_encoded)
    print(f"\nDecision Tree Classifier trained with max_depth={max_depth}.")

    # Point 2: Create rules from the decision tree (text format for initial understanding)
    print("\n--- Decision Tree Rules (Text Format) ---")
    tree_rules_text = export_text(dt_classifier, feature_names=list(X.columns), class_names=target_names)
    print(tree_rules_text)

    # Point 2: Function to extract structured rules for Python and Prolog
    def extract_structured_rules(tree, feature_names, target_names):
        left      = tree.tree_.children_left
        right     = tree.tree_.children_right
        threshold = tree.tree_.threshold
        features  = [feature_names[i] for i in tree.tree_.feature]
        value     = tree.tree_.value

        rules = []

        def recurse(node, current_rule):
            if threshold[node] != -2:
                left_rule = current_rule + [(features[node], '<=', threshold[node])]
                recurse(left[node], left_rule)

                right_rule = current_rule + [(features[node], '>', threshold[node])]
                recurse(right[node], right_rule)
            else:
                class_values = value[node][0]
                predicted_class_idx = class_values.argmax()
                predicted_class_name = target_names[predicted_class_idx]
                rules.append((current_rule, predicted_class_name))

        recurse(0, [])
        return rules

    structured_python_rules = extract_structured_rules(dt_classifier, list(X.columns), target_names)

    # Point 3: Code the rules into a Prolog Knowledge base
    prolog_kb = []
    prolog_kb.append("% Prolog Knowledge Base for Medical Insurance Premium Prediction")
    prolog_kb.append(f"% Generated from Decision Tree with max_depth = {max_depth}")
    prolog_kb.append("% Features: Age, Diabetes, BloodPressureProblems, AnyTransplants, AnyChronicDiseases, Height, Weight, KnownAllergies, HistoryOfCancerInFamily, NumberOfMajorSurgeries")
    prolog_kb.append("% Target: Premium (low or high)")
    prolog_kb.append("\n")

    feature_map = {
        'Age': 'Age',
        'Diabetes': 'Diabetes',
        'BloodPressureProblems': 'BPProblems',
        'AnyTransplants': 'Transplants',
        'AnyChronicDiseases': 'ChronicDiseases',
        'Height': 'Height',
        'Weight': 'Weight',
        'KnownAllergies': 'Allergies',
        'HistoryOfCancerInFamily': 'CancerFamily',
        'NumberOfMajorSurgeries': 'MajorSurgeries'
    }

    predicate_args_vars = [feature_map[col] for col in X.columns]
    predicate_head_template = f"premium(PremiumCategory)"

    for i, (conditions, prediction) in enumerate(structured_python_rules):
        rule_conditions = []
        for feature, op, threshold in conditions:
            prolog_op = ''
            if op == '<=':
                prolog_op = '=<'
            elif op == '>':
                prolog_op = '>'

            mapped_feature_var = feature_map.get(feature, feature)
            rule_conditions.append(f"{mapped_feature_var} {prolog_op} {threshold:.1f}")

        if rule_conditions:
            prolog_rule = f"{predicate_head_template.replace('PremiumCategory', prediction.lower())} :- " + ", ".join(rule_conditions) + "."
        else:
            prolog_rule = f"{predicate_head_template.replace('PremiumCategory', prediction.lower())}."

        prolog_kb.append(prolog_rule)

    prolog_kb_str = "\n".join(prolog_kb)
    return prolog_kb_str, structured_python_rules, feature_map

# Point 4: Get account holder details as input and predict in Python
def predict_premium_from_rules(account_holder_details, structured_rules):
    for conditions, prediction in structured_rules:
        rule_matches = True
        for feature, op, threshold in conditions:
            if feature not in account_holder_details:
                print(f"Warning: Feature '{feature}' missing from account holder details. Cannot apply rule.")
                rule_matches = False
                break
            
            value = account_holder_details[feature]

            if op == '<=':
                if not (value <= threshold):
                    rule_matches = False
                    break
            elif op == '>':
                if not (value > threshold):
                    rule_matches = False
                    break

        if rule_matches:
            return prediction
    
    return "Unknown"

if __name__ == "__main__":
    prolog_output, python_rules, feature_map_dict = generate_prolog_rules_from_csv(CSV_FILE_PATH, DECISION_TREE_MAX_DEPTH)

    if prolog_output:
        print("\n--- Generated Prolog Knowledge Base ---")
        print(prolog_output)

        with open(PROLOG_OUTPUT_FILE, "w") as f:
            f.write(prolog_output)
        print(f"\nProlog knowledge base saved to '{PROLOG_OUTPUT_FILE}'")

        print("\n--- Dynamic Python-based Prediction (Point 4) ---")

        feature_prompts = {
            'Age': {'type': int, 'prompt': "Enter Age (e.g., 30): "},
            'Diabetes': {'type': int, 'prompt': "Has Diabetes? (0 for No, 1 for Yes): "},
            'BloodPressureProblems': {'type': int, 'prompt': "Has Blood Pressure Problems? (0 for No, 1 for Yes): "},
            'AnyTransplants': {'type': int, 'prompt': "Had Any Transplants? (0 for No, 1 for Yes): "},
            'AnyChronicDiseases': {'type': int, 'prompt': "Has Any Chronic Diseases? (0 for No, 1 for Yes): "},
            'Height': {'type': float, 'prompt': "Enter Height in cm (e.g., 170.5): "},
            'Weight': {'type': float, 'prompt': "Enter Weight in kg (e.g., 75.2): "},
            'KnownAllergies': {'type': int, 'prompt': "Has Known Allergies? (0 for No, 1 for Yes): "},
            'HistoryOfCancerInFamily': {'type': int, 'prompt': "Has History of Cancer in Family? (0 for No, 1 for Yes): "},
            'NumberOfMajorSurgeries': {'type': int, 'prompt': "Enter Number of Major Surgeries (e.g., 0, 1, 2): "}
        }

        while True:
            account_holder_input = {}
            print("\n--- Enter Account Holder Details for Prediction ---")
            for feature, details in feature_prompts.items():
                while True:
                    try:
                        user_input = input(details['prompt'])
                        account_holder_input[feature] = details['type'](user_input)
                        break
                    except ValueError:
                        print(f"Invalid input. Please enter a valid {details['type'].__name__}.")
            
            prediction = predict_premium_from_rules(account_holder_input, python_rules)
            print(f"\nAccount Holder Details: {account_holder_input}")
            print(f"Predicted Premium Category (Python): {prediction}")

            another_prediction = input("\nDo you want to predict for another account holder? (yes/no): ").lower()
            if another_prediction != 'yes':
                break


Successfully loaded data from Medicalpremium.csv. Shape: (986, 11)
Median PremiumPrice used for classification: 23000.00
PremiumCategory distribution:
PremiumCategory
Low     500
High    486
Name: count, dtype: int64
Target variable mapping: {'High': np.int64(0), 'Low': np.int64(1)}
Internal target names for decision tree: ['Low', 'High']

Decision Tree Classifier trained with max_depth=4.

--- Decision Tree Rules (Text Format) ---
|--- Age <= 46.50
|   |--- Age <= 38.50
|   |   |--- AnyTransplants <= 0.50
|   |   |   |--- HistoryOfCancerInFamily <= 0.50
|   |   |   |   |--- class: Low
|   |   |   |--- HistoryOfCancerInFamily >  0.50
|   |   |   |   |--- class: Low
|   |   |--- AnyTransplants >  0.50
|   |   |   |--- Age <= 29.00
|   |   |   |   |--- class: Low
|   |   |   |--- Age >  29.00
|   |   |   |   |--- class: High
|   |--- Age >  38.50
|   |   |--- AnyChronicDiseases <= 0.50
|   |   |   |--- HistoryOfCancerInFamily <= 0.50
|   |   |   |   |--- class: Low
|   |   |   |--- Histo