In [1]:
# Bayesian Decision Theory Classification (Breast Cancer Dataset)

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
data = datasets.load_breast_cancer()
X = data.data
y = data.target

# Train–test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Estimate priors, means, covariances
classes = np.unique(y_train)

priors = {}
means = {}
covs = {}

for c in classes:
    Xc = X_train[y_train == c]
    priors[c] = len(Xc) / len(X_train)
    means[c] = np.mean(Xc, axis=0)
    covs[c] = np.cov(Xc.T) + 1e-6 * np.eye(X.shape[1])  # stability

# Print values
print("=== Priors ===")
for c in classes:
    print(f"Class {c}: {priors[c]}")

print("\n=== Means ===")
for c in classes:
    print(f"Class {c} Mean:\n{means[c]}\n")

print("=== Covariance Matrices ===")
for c in classes:
    print(f"Class {c} Covariance Matrix:\n{covs[c]}\n")


# Multivariate Gaussian log-likelihood
def log_gaussian(x, mean, cov):
    d = len(x)
    x = x.reshape(-1,1)
    mean = mean.reshape(-1,1)
    inv = np.linalg.inv(cov)
    det = np.linalg.det(cov)
    return -0.5 * (np.log(det) + (x-mean).T @ inv @ (x-mean) + d*np.log(2*np.pi))


# Predict using MAP (Bayesian Decision Theory)
def predict(X):
    preds = []
    for x in X:
        scores = []
        for c in classes:
            score = log_gaussian(x, means[c], covs[c]) + np.log(priors[c])
            scores.append(score)
        preds.append(np.argmax(scores))
    return np.array(preds)


# Evaluate
y_pred = predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

ModuleNotFoundError: No module named 'numpy'

In [2]:
!{sys.executable} -m pip install numpy

'{sys.executable}' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
pip install numpy pandas matplotlib scikit-learn

Collecting numpy
  Downloading numpy-2.3.4-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp313-cp313-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.60.1-cp313-cp313-win_amd64.whl.metadata (114 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp313-cp313-win_amd64.whl.metadata (6.4 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillo


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
!pip install ipykernel
python -m ipykernel install --user --name=python313 --display-name "Python 3.13"

SyntaxError: invalid syntax (3486699168.py, line 2)

In [6]:
# Bayesian Decision Theory Classification (Breast Cancer Dataset)

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
data = datasets.load_breast_cancer()
X = data.data
y = data.target

# Train–test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Estimate priors, means, covariances
classes = np.unique(y_train)

priors = {}
means = {}
covs = {}

for c in classes:
    Xc = X_train[y_train == c]
    priors[c] = len(Xc) / len(X_train)
    means[c] = np.mean(Xc, axis=0)
    covs[c] = np.cov(Xc.T) + 1e-6 * np.eye(X.shape[1])  # stability

# Print values
print("=== Priors ===")
for c in classes:
    print(f"Class {c}: {priors[c]}")

print("\n=== Means ===")
for c in classes:
    print(f"Class {c} Mean:\n{means[c]}\n")

print("=== Covariance Matrices ===")
for c in classes:
    print(f"Class {c} Covariance Matrix:\n{covs[c]}\n")


# Multivariate Gaussian log-likelihood
def log_gaussian(x, mean, cov):
    d = len(x)
    x = x.reshape(-1,1)
    mean = mean.reshape(-1,1)
    inv = np.linalg.inv(cov)
    det = np.linalg.det(cov)
    return -0.5 * (np.log(det) + (x-mean).T @ inv @ (x-mean) + d*np.log(2*np.pi))


# Predict using MAP (Bayesian Decision Theory)
def predict(X):
    preds = []
    for x in X:
        scores = []
        for c in classes:
            score = log_gaussian(x, means[c], covs[c]) + np.log(priors[c])
            scores.append(score)
        preds.append(np.argmax(scores))
    return np.array(preds)


# Evaluate
y_pred = predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

=== Priors ===
Class 0: 0.3743718592964824
Class 1: 0.6256281407035176

=== Means ===
Class 0 Mean:
[1.74306040e+01 2.13687919e+01 1.15044765e+02 9.78583221e+02
 1.02309933e-01 1.41809597e-01 1.57551678e-01 8.64344295e-02
 1.92489262e-01 6.23636913e-02 6.09218121e-01 1.21797114e+00
 4.31924161e+00 7.29159060e+01 6.85580537e-03 3.16668993e-02
 4.19473154e-02 1.51030470e-02 2.07178456e-02 3.99662416e-03
 2.10074497e+01 2.90446980e+01 1.40291678e+02 1.40890268e+03
 1.43840201e-01 3.60683557e-01 4.39328054e-01 1.79090000e-01
 3.23202685e-01 9.01081208e-02]

Class 1 Mean:
[1.22285904e+01 1.78369478e+01 7.86198795e+01 4.68995582e+02
 9.21630924e-02 8.02612048e-02 4.64938514e-02 2.57038876e-02
 1.73019277e-01 6.28435743e-02 2.82459438e-01 1.19504337e+00
 2.01125823e+00 2.12581004e+01 7.17241365e-03 2.15797751e-02
 2.64623048e-02 1.00246667e-02 2.01655823e-02 3.66777229e-03
 1.34628273e+01 2.33877510e+01 8.76600402e+01 5.66462249e+02
 1.24550522e-01 1.82306747e-01 1.66273369e-01 7.47929357e-02