In [None]:
"""
NaiveBayes_GUI.py (Auto-Fix Version)
✅ No KeyError even if CSV column names differ.
✅ Auto-renames headers based on best match.
✅ Clean, simple interface.
"""

import tkinter as tk
from tkinter import ttk, messagebox, scrolledtext
import pandas as pd
import os

# ---------- DATA PATHS ----------
DATA_PATHS = {
    'Animal': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\AnimalInformation.csv",
    'Loan': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\LoanApprovalupdated.csv",
    'Weather': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\weatherAndRoadCondition.csv",
    'Email': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\EmailSpamDetectionUpdated.csv",
}

# ---------- FALLBACK DATA ----------
SAMPLE_DATA = {
    'Animal': pd.DataFrame([
        [0,'Dog','Medium','Black','Yes'],
        [1,'Dog','Big','White','No'],
        [2,'Rat','Small','White','Yes'],
        [3,'Cow','Big','White','Yes'],
        [4,'Cow','Small','Brown','No'],
        [5,'Cow','Big','Black','Yes'],
        [6,'Rat','Big','Brown','No'],
        [7,'Dog','Small','Brown','Yes'],
        [8,'Dog','Medium','Brown','Yes'],
        [9,'Cow','Medium','White','No'],
    ], columns=['Index','Animal','Size','Color','Pettable']),

    'Loan': pd.DataFrame([
        [1,'Young','High','Good','Employed','Yes'],
        [2,'Middle','Low','Poor','Unemployed','No'],
        [3,'Senior','High','Excellent','Retired','Yes'],
        [4,'Young','Low','Good','Employed','No'],
    ], columns=['ID','AgeGroup','IncomeLevel','CreditScore','EmploymentStatus','LoanApproved']),

    'Weather': pd.DataFrame([
        [0,'Rain','Good','Light','No','Yes'],
        [1,'Snow','Bad','Normal','Yes','Yes'],
        [2,'Clear','Average','High','No','Yes'],
        [3,'Rain','Average','Light','No','No'],
    ], columns=['Index','Weather','Road','Traffic','Engine','Accident']),

    'Email': pd.DataFrame([
        [101,'Yes','Yes','Yes','No','Yes'],
        [102,'No','No','Yes','Yes','No'],
        [103,'Yes','Yes','No','No','Yes'],
    ], columns=['ID','Offer','Link','Greeting','Known','Spam'])
}

# ---------- CONFIG ----------
DATA_CONFIG = {
    'Animal': ('Pettable', ['Animal','Size','Color']),
    'Loan': ('LoanApproved', ['AgeGroup','IncomeLevel','CreditScore','EmploymentStatus']),
    'Weather': ('Accident', ['Weather','Road','Traffic','Engine']),
    'Email': ('Spam', ['Offer','Link','Greeting','Known']),
}

# ---------- UTILITY ----------
def clean_columns(df):
    """Lowercase, strip, and normalize column names"""
    df.columns = [c.strip().lower().replace(" ", "").replace("\n","") for c in df.columns]
    return df

def auto_fix_columns(df, expected_cols):
    """Auto-map CSV columns to expected ones based on similarity"""
    df = clean_columns(df)
    new_cols = []
    for col in expected_cols:
        c_lower = col.lower().replace(" ", "")
        match = next((c for c in df.columns if c_lower in c or c in c_lower), None)
        new_cols.append(match if match else col.lower())
    if len(df.columns) >= len(expected_cols):
        df.columns = expected_cols + list(df.columns[len(expected_cols):])
    else:
        df.columns = expected_cols[:len(df.columns)]
    return df

def load_dataset(name):
    path = DATA_PATHS.get(name)
    if path and os.path.exists(path):
        try:
            df = pd.read_csv(path)
            expected_cols = list(SAMPLE_DATA[name].columns)
            df = auto_fix_columns(df, expected_cols)
            return df
        except Exception as e:
            print(f"[Warning] Failed to load {name}: {e}")
    return SAMPLE_DATA[name].copy()

def _norm_str(s):
    return str(s).strip().lower()

# ---------- NAIVE BAYES ----------
def categorical_naive_bayes_exact(df, feature_columns, label_column, feature_values):
    df = df.copy()
    df['_label_norm'] = df[label_column].astype(str).map(_norm_str)
    for col in feature_columns:
        df['_f_' + col] = df[col].astype(str).map(_norm_str)

    total = len(df)
    classes = df['_label_norm'].unique().tolist()
    priors = {cls: len(df[df['_label_norm']==cls])/total for cls in classes}

    class_scores, steps = {}, {}
    result_steps = []
    for cls in classes:
        subset = df[df['_label_norm']==cls]
        subset_count = len(subset)
        score = priors[cls]
        result_steps.append(f"--- Class: {cls} ---")
        for i,col in enumerate(feature_columns):
            norm_col = '_f_' + col
            val = _norm_str(feature_values[i])
            k = len(df[norm_col].unique())
            count = len(subset[subset[norm_col]==val])
            cond_prob = (count+1)/(subset_count+k)
            score *= cond_prob
            result_steps.append(f"P({col}={val}|{cls}) = ({count}+1)/({subset_count}+{k}) = {cond_prob:.4f}")
        class_scores[cls] = score
        result_steps.append(f"Posterior (unnormalized): {score:.6f}\n")

    total_score = sum(class_scores.values())
    normalized = {cls: (v/total_score if total_score>0 else 0) for cls,v in class_scores.items()}
    prediction = max(normalized, key=normalized.get)
    return {"priors":priors,"steps":result_steps,"normalized":normalized,"prediction":prediction}

# ---------- GUI ----------
class NaiveBayesApp(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Naive Bayes Classifier (Auto-Fix)")
        self.geometry("950x600")
        self.datasets = list(DATA_CONFIG.keys())
        self.loaded = {n: load_dataset(n) for n in self.datasets}
        self.selected_dataset = tk.StringVar(value=self.datasets[0])
        self._build_ui()
        self.select_dataset(self.datasets[0])

    def _build_ui(self):
        sidebar = tk.Frame(self, bg="#e0e0e0", width=200)
        sidebar.pack(side="left", fill="y")
        tk.Label(sidebar, text="Datasets", bg="#e0e0e0", font=("Segoe UI", 12, "bold")).pack(pady=10)
        for name in self.datasets:
            ttk.Button(sidebar, text=name, command=lambda n=name: self.select_dataset(n)).pack(fill="x", padx=10, pady=4)

        main = tk.Frame(self, bg="#f9f9f9")
        main.pack(side="left", fill="both", expand=True, padx=10, pady=10)
        self.title_label = tk.Label(main, text="", font=("Segoe UI", 13, "bold"), bg="#f9f9f9")
        self.title_label.pack(anchor="nw")
        self.form_frame = tk.Frame(main, bg="#f9f9f9")
        self.form_frame.pack(anchor="nw", pady=8)

        ttk.Button(main, text="Predict", command=self.on_predict).pack(anchor="nw", pady=4)
        self.output_label = tk.Label(main, text="Prediction: -", bg="#f9f9f9", font=("Segoe UI", 12, "bold"))
        self.output_label.pack(anchor="nw", pady=4)

        self.output_box = scrolledtext.ScrolledText(main, width=100, height=22, wrap="word", font=("Consolas", 10))
        self.output_box.pack(fill="both", expand=True)
        self.output_box.config(state="disabled")

    def select_dataset(self, name):
        self.selected_dataset.set(name)
        df = self.loaded[name]
        label_col, feature_cols = DATA_CONFIG[name]
        self.title_label.config(text=f"Dataset: {name} | Label: {label_col}")

        for w in self.form_frame.winfo_children():
            w.destroy()

        self.dropdown_vars = []
        for col in feature_cols:
            vals = sorted(set(df[col].dropna().astype(str)))
            var = tk.StringVar(value=vals[0] if vals else "")
            self.dropdown_vars.append((col, var))
            fr = tk.Frame(self.form_frame, bg="#f9f9f9")
            fr.pack(anchor="w", pady=3)
            tk.Label(fr, text=f"{col}:", bg="#f9f9f9").pack(side="left")
            ttk.Combobox(fr, values=vals, textvariable=var, state="readonly", width=20).pack(side="left", padx=6)

    def on_predict(self):
        name = self.selected_dataset.get()
        df = self.loaded[name]
        label_col, feature_cols = DATA_CONFIG[name]
        values = [v.get() for _, v in self.dropdown_vars]
        if any(v == "" for v in values):
            messagebox.showwarning("Missing input", "Please choose all feature values.")
            return
        res = categorical_naive_bayes_exact(df, feature_cols, label_col, values)
        self.output_label.config(text=f"Prediction: {res['prediction']}")
        self.output_box.config(state="normal")
        self.output_box.delete("1.0", "end")
        for line in res["steps"]:
            self.output_box.insert("end", line + "\n")
        self.output_box.insert("end", "\n=== Posterior Probabilities ===\n")
        for k,v in res["normalized"].items():
            self.output_box.insert("end", f"{k}: {v:.6f}\n")
        self.output_box.insert("end", f"\n✅ Final Prediction: {res['prediction']}\n")
        self.output_box.config(state="disabled")

if __name__ == "__main__":
    app = NaiveBayesApp()
    app.mainloop()


In [None]:
import tkinter as tk
from tkinter import ttk, messagebox, scrolledtext
import pandas as pd
import os

DATA_PATHS = {
    'Animal': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\AnimalInformation.csv",
    'Loan': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\LoanApprovalupdated.csv",
    'Weather': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\weatherAndRoadCondition.csv",
    'Email': r"C:\Users\ideal\Downloads\Naivebayes-main (1)\Naivebayes-main\Dataset\EmailSpamDetectionUpdated.csv",
}

EXPECTED_COLUMNS = {
    'Animal': ['Index','Animal','Size','Color','Pettable'],
    'Loan': ['ID','AgeGroup','IncomeLevel','CreditScore','EmploymentStatus','LoanApproved'],
    'Weather': ['Index','Weather','Road','Traffic','Engine','Accident'],
    'Email': ['ID','Offer','Link','Greeting','Known','Spam']
}

SAMPLE_DATA = {
    'Animal': pd.DataFrame([
        [0,'Dog','Medium','Black','Yes'],
        [1,'Dog','Big','White','No'],
        [2,'Rat','Small','White','Yes'],
        [3,'Cow','Big','White','Yes'],
        [4,'Cow','Small','Brown','No']
    ], columns=EXPECTED_COLUMNS['Animal'])
}

DATA_CONFIG = {
    'Animal': ('Pettable', ['Animal','Size','Color'])
}

def load_dataset(name):
    path = DATA_PATHS[name]
    expected = EXPECTED_COLUMNS[name]
    if os.path.exists(path):
        try:
            df = pd.read_csv(path)
            df.columns = [c.strip() for c in df.columns]
            # auto-fix column count
            if len(df.columns) != len(expected):
                print(f"[Warning] Fixing columns for {name}")
                df = df.iloc[:, :len(expected)]
                df.columns = expected
            else:
                df.columns = expected
            return df
        except Exception as e:
            print(f"Failed to read {name}: {e}")
            return SAMPLE_DATA[name]
    else:
        print(f"[Missing] {path}, using sample data.")
        return SAMPLE_DATA[name]

def _norm_str(s):
    return str(s).strip().lower()

def categorical_naive_bayes_exact(df, features, label, values):
    df['_label'] = df[label].astype(str).map(_norm_str)
    classes = df['_label'].unique()
    priors = {c: len(df[df['_label']==c])/len(df) for c in classes}
    scores = {}
    steps = []
    for c in classes:
        subset = df[df['_label']==c]
        score = priors[c]
        steps.append(f"\n--- Class: {c} ---")
        for i, col in enumerate(features):
            norm_col = df[col].astype(str).map(_norm_str)
            val = _norm_str(values[i])
            count = len(subset[norm_col==val])
            k = len(df[col].unique())
            cond = (count+1)/(len(subset)+k)
            score *= cond
            steps.append(f"P({col}={values[i]}|{c}) = ({count}+1)/({len(subset)}+{k}) = {cond:.4f}")
        scores[c] = score
    total = sum(scores.values())
    normalized = {c: scores[c]/total for c in scores}
    pred = max(normalized, key=normalized.get)
    return {'priors':priors, 'steps':steps, 'normalized':normalized, 'prediction':pred}

class NaiveBayesApp(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Naive Bayes GUI (AutoFix)")
        self.geometry("900x600")
        self.df = load_dataset('Animal')
        self.label, self.features = DATA_CONFIG['Animal']
        self._build_ui()

    def _build_ui(self):
        tk.Label(self, text="Animal Dataset", font=('Segoe UI',13,'bold')).pack()
        self.vars = []
        form = tk.Frame(self)
        form.pack(pady=10)
        for col in self.features:
            vals = sorted(set(self.df[col].astype(str)))
            v = tk.StringVar(value=vals[0])
            self.vars.append(v)
            row = tk.Frame(form)
            row.pack(anchor='w')
            tk.Label(row, text=f"{col}:", width=10).pack(side='left')
            ttk.Combobox(row, values=vals, textvariable=v, state='readonly', width=20).pack(side='left', padx=5)

        ttk.Button(self, text="Predict", command=self.on_predict).pack(pady=10)
        self.output = scrolledtext.ScrolledText(self, width=100, height=20, font=('Consolas',10))
        self.output.pack()

    def on_predict(self):
        values = [v.get() for v in self.vars]
        res = categorical_naive_bayes_exact(self.df, self.features, self.label, values)
        self.output.config(state='normal')
        self.output.delete('1.0','end')
        for s in res['steps']:
            self.output.insert('end', s+'\n')
        self.output.insert('end', f"\nPrediction: {res['prediction']}\n")
        self.output.config(state='disabled')

if __name__ == '__main__':
    app = NaiveBayesApp()
    app.mainloop()
