### graphical models

Create a custom sckikit-learn classifier based on the template provided. You should let users either provide an exiting bayesian network model. If a model isn't  provided, you should automatically perform structure learning in fit, before the estimation of the parameters.

In [6]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_array, check_X_y, check_is_fitted
from sklearn.utils.multiclass import unique_labels


from pgmpy.models import BayesianNetwork 
from pgmpy.estimators import HillClimbSearch, MaximumLikelihoodEstimator 
from pgmpy.inference import VariableElimination

Create a custom sckikit-learn classifier based on the template provided. You should let users either provide an exiting bayesian network model. If a model isn't  provided, you should automatically perform structure learning in fit, before the estimation of the parameters. 

In [7]:

class BayesianNetworkClassifier(ClassifierMixin, BaseEstimator):

    def __init__(self, model=None, structure_method="hillclimb"):
        """
        model: pgmpy.models.BayesianNetwork or None
            If provided, this model will be used directly.
            If None, structure learning is performed in fit().
        structure_method: str
            The method for structure learning ("hillclimb" supported).
        """
        self.model = model
        self.structure_method = structure_method

    def fit(self, X, y):
        # Validate inputs
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)

        # Convert to DataFrame for pgmpy
        data = pd.DataFrame(X, columns=[f"f{i}" for i in range(X.shape[1])])
        data["target"] = y

        # If no model provided, perform structure learning
        if self.model is None:
            if self.structure_method == "hillclimb":
                hc = HillClimbSearch(data, use_cache=True)
                best_model = hc.estimate(scoring_method="k2")
                self.model_ = BayesianNetwork(best_model.edges())
            else:
                raise ValueError(f"Unsupported structure method: {self.structure_method}")
        else:
            self.model_ = self.model

        # Estimate parameters
        self.model_.fit(data, estimator=MaximumLikelihoodEstimator)

        # Store inference engine for predictions
        self.infer_ = VariableElimination(self.model_)
        return self

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)

        data = pd.DataFrame(X, columns=[f"f{i}" for i in range(X.shape[1])])
        preds = []
        for _, row in data.iterrows():
            q = self.infer_.query(
                variables=["target"],
                evidence=row.to_dict()
            )
            # Pick class with max probability
            preds.append(q.values.argmax())
        return [self.classes_[i] for i in preds]

    def predict_proba(self, X):
        check_is_fitted(self)
        X = check_array(X)

        data = pd.DataFrame(X, columns=[f"f{i}" for i in range(X.shape[1])])
        proba = []
        for _, row in data.iterrows():
            q = self.infer_.query(
                variables=["target"],
                evidence=row.to_dict()
            )
            # Align probs with self.classes_
            prob_vec = [q.values[self.classes_.tolist().index(c)] for c in self.classes_]
            proba.append(prob_vec)
        return proba