# Naive Bayes Classifier

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = load_iris()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Gaussian Naive Bayes classifier
gnb = GaussianNB()

# Train the classifier
gnb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:")
print(report)

# Metropolis-Hastings Algorithm

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def target_distribution(x):
    # Example target distribution: Standard normal distribution
    return np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)

def proposal_distribution(x, sigma):
    # Example proposal distribution: Normal distribution centered at x with standard deviation sigma
    return np.random.normal(x, sigma)

def metropolis_hastings(target, proposal, initial_value, num_samples, sigma):
    samples = [initial_value]
    current_value = initial_value

    for _ in range(num_samples):
        proposed_value = proposal(current_value, sigma)
        acceptance_ratio = target(proposed_value) / target(current_value)
        if np.random.rand() < acceptance_ratio:
            current_value = proposed_value
        samples.append(current_value)

    return np.array(samples)

# Parameters
initial_value = 0
num_samples = 10000
sigma = 1.0

# Run Metropolis-Hastings algorithm
samples = metropolis_hastings(target_distribution, proposal_distribution, initial_value, num_samples, sigma)

# Plot results
plt.figure(figsize=(12, 6))
plt.hist(samples, bins=50, density=True, alpha=0.6, color='g', label='Samples')
x = np.linspace(-4, 4, 1000)
plt.plot(x, target_distribution(x), 'r', lw=2, label='Target Distribution')
plt.title('Metropolis-Hastings Sampling')
plt.xlabel('x')
plt.ylabel('Density')
plt.legend()
plt.show()

# Rejection ABC Algorithm

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# True parameters of the distribution (unknown to the algorithm)
true_mu = 2.0
true_sigma = 1.0

# Simulated observed data (known to the algorithm)
observed_data = np.random.normal(true_mu, true_sigma, size=100)

# Function to simulate data from a given parameter set
def simulate_data(mu, sigma, size=100):
    return np.random.normal(mu, sigma, size)

# Function to calculate the summary statistics (Euclidean distance in this case)
def summary_statistics(data):
    return np.mean(data), np.std(data)

# Function to compute distance between observed and simulated summary statistics
def compute_distance(obs_stat, sim_stat):
    obs_mean, obs_std = obs_stat
    sim_mean, sim_std = sim_stat
    return np.sqrt((obs_mean - sim_mean)**2 + (obs_std - sim_std)**2)

# Rejection ABC algorithm
def rejection_abc(observed_data, epsilon, num_samples):
    samples = []
    while len(samples) < num_samples:
        # Generate parameters from prior distributions (uniform here for simplicity)
        mu = np.random.uniform(0, 5)
        sigma = np.random.uniform(0, 3)

        # Simulate data from the current parameter set
        simulated_data = simulate_data(mu, sigma)

        # Calculate summary statistics of the simulated data
        sim_stats = summary_statistics(simulated_data)

        # Calculate distance between observed and simulated summary statistics
        distance = compute_distance(summary_statistics(observed_data), sim_stats)

        # Accept the parameter set if the distance is less than epsilon
        if distance < epsilon:
            samples.append((mu, sigma))

    return np.array(samples)

# Parameters
epsilon = 0.5  # Acceptance threshold
num_samples = 1000  # Number of samples to generate

# Run Rejection ABC algorithm
samples = rejection_abc(observed_data, epsilon, num_samples)

# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(samples[:, 0], samples[:, 1], alpha=0.5, label='Samples')
plt.axvline(true_mu, color='r', linestyle='--', label='True $\mu$')
plt.axhline(true_sigma, color='b', linestyle='--', label='True $\sigma$')
plt.xlabel('$\mu$')
plt.ylabel('$\sigma$')
plt.title('Rejection ABC Sampling')
plt.legend()
plt.grid(True)
plt.show()

# Probabilistic Inference in Bayesian Networks

In [None]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Define the Bayesian Network structure
bayesian_network = BayesianNetwork([('A', 'C'), ('B', 'C'), ('C', 'D')])

# Define Conditional Probability Distributions (CPDs)
cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.7], [0.3]])
cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.8], [0.2]])
cpd_c = TabularCPD(variable='C', variable_card=2,
                   values=[[0.9, 0.8, 0.5, 0.4],
                           [0.1, 0.2, 0.5, 0.6]],
                   evidence=['A', 'B'], evidence_card=[2, 2])
cpd_d = TabularCPD(variable='D', variable_card=2,
                   values=[[0.3, 0.2],
                           [0.7, 0.8]],
                   evidence=['C'], evidence_card=[2])

# Add CPDs to the Bayesian Network
bayesian_network.add_cpds(cpd_a, cpd_b, cpd_c, cpd_d)

# Check if the CPDs are valid
print("CPDs valid:", bayesian_network.check_model())

# Perform variable elimination for inference
inference = VariableElimination(bayesian_network)

# Calculate the probability of variables given evidence
query = inference.query(variables=['D'], evidence={'A': 1, 'B': 0})
print("P(D | A=1, B=0):")
print(query)

# You can query for different variables and evidence combinations as needed

# Metropolis-Hastings Algorithm for Bayesian Linear Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data
np.random.seed(0)
num_samples = 100
true_slope = 2.0
true_intercept = 1.0
noise_std = 1.0
x = np.random.uniform(-5, 5, num_samples)
y = true_slope * x + true_intercept + np.random.normal(0, noise_std, num_samples)

# Bayesian Linear Regression using Metropolis-Hastings Algorithm
def metropolis_hastings_bayesian_linear_regression(x, y, num_samples=10000, burn_in=1000):
    # Define prior parameters (Normal distribution)
    prior_mean = np.zeros(2)  # Mean of the prior
    prior_cov = np.diag([1.0, 1.0])  # Covariance matrix of the prior

    # Initialize parameters
    current_beta = np.zeros(2)  # Initial guess for beta (slope and intercept)
    beta_samples = []

    # Function to calculate log likelihood
    def log_likelihood(beta):
        predicted = beta[0] * x + beta[1]
        return -0.5 * np.sum((y - predicted) ** 2) / noise_std**2

    # Function to calculate log prior
    def log_prior(beta):
        return -0.5 * (beta - prior_mean).T @ np.linalg.inv(prior_cov) @ (beta - prior_mean)

    # Initial log posterior
    current_log_posterior = log_likelihood(current_beta) + log_prior(current_beta)

    # Metropolis-Hastings sampling
    for _ in range(num_samples + burn_in):
        # Generate proposal from a normal distribution centered at current_beta
        proposal = current_beta + np.random.normal(0, 0.1, size=2)

        # Calculate log posterior of the proposed beta
        proposal_log_posterior = log_likelihood(proposal) + log_prior(proposal)

        # Accept or reject the proposal
        acceptance_prob = min(1, np.exp(proposal_log_posterior - current_log_posterior))
        if np.random.uniform() < acceptance_prob:
            current_beta = proposal
            current_log_posterior = proposal_log_posterior

        # Save samples after burn-in
        if _ >= burn_in:
            beta_samples.append(current_beta)

    return np.array(beta_samples)

# Run Metropolis-Hastings algorithm for Bayesian Linear Regression
samples = metropolis_hastings_bayesian_linear_regression(x, y)

# Plotting results
plt.figure(figsize=(10, 6))
plt.subplot(2, 1, 1)
plt.plot(samples[:, 0], label='slope (beta1)')
plt.axhline(true_slope, color='r', linestyle='--', label='True slope')
plt.title('Samples of Slope (beta1)')
plt.legend()

plt.subplot(2, 1, 2)
plt.plot(samples[:, 1], label='intercept (beta0)')
plt.axhline(true_intercept, color='r', linestyle='--', label='True intercept')
plt.title('Samples of Intercept (beta0)')
plt.legend()

plt.tight_layout()
plt.show()

# Bayesian Spam Filtering Algorithm

In [None]:
import re
from collections import defaultdict
import numpy as np

class BayesianSpamFilter:
    def __init__(self):
        self.spam_word_counts = defaultdict(int)
        self.ham_word_counts = defaultdict(int)
        self.spam_total_count = 0
        self.ham_total_count = 0
        self.spam_prior = 0.5  # Prior probability of an email being spam
        self.word_threshold = 5  # Threshold count to consider a word as a feature

    def train(self, emails, labels):
        """
        Train the Bayesian Spam Filter with a set of emails and their corresponding labels.

        :param emails: List of email texts
        :param labels: List of labels (1 for spam, 0 for ham)
        """
        for email, label in zip(emails, labels):
            if label == 1:
                self.spam_total_count += 1
                for word in self.extract_words(email):
                    self.spam_word_counts[word] += 1
            else:
                self.ham_total_count += 1
                for word in self.extract_words(email):
                    self.ham_word_counts[word] += 1

    def extract_words(self, email):
        """
        Extract words from an email after cleaning and normalization.

        :param email: Email text
        :return: List of words (features)
        """
        words = re.findall(r'\b\w+\b', email.lower())
        return words

    def calculate_word_probabilities(self):
        """
        Calculate probabilities of each word being spam or ham using Bayesian estimation.

        :return: Dictionary of word probabilities
        """
        word_probabilities = {}
        for word in set(self.spam_word_counts.keys()).union(set(self.ham_word_counts.keys())):
            spam_count = self.spam_word_counts[word]
            ham_count = self.ham_word_counts[word]

            if spam_count + ham_count >= self.word_threshold:
                spam_probability = (spam_count / float(self.spam_total_count)) if self.spam_total_count > 0 else 0
                ham_probability = (ham_count / float(self.ham_total_count)) if self.ham_total_count > 0 else 0

                # Using Laplace (add-one) smoothing for better estimation
                smoothing_factor = 1  # Laplace smoothing factor
                spam_probability_smoothed = (spam_count + smoothing_factor) / \
                                            float(self.spam_total_count + 2 * smoothing_factor)
                ham_probability_smoothed = (ham_count + smoothing_factor) / \
                                           float(self.ham_total_count + 2 * smoothing_factor)

                word_probabilities[word] = {
                    'spam_probability': spam_probability_smoothed,
                    'ham_probability': ham_probability_smoothed
                }

        return word_probabilities

    def predict(self, email):
        """
        Predict whether an email is spam or ham based on its features (words).

        :param email: Email text
        :return: Predicted label (1 for spam, 0 for ham)
        """
        words = self.extract_words(email)
        word_probabilities = self.calculate_word_probabilities()

        log_spam_probability = np.log(self.spam_prior)
        log_ham_probability = np.log(1 - self.spam_prior)

        for word in words:
            if word in word_probabilities:
                log_spam_probability += np.log(word_probabilities[word]['spam_probability'])
                log_ham_probability += np.log(word_probabilities[word]['ham_probability'])

        if log_spam_probability > log_ham_probability:
            return 1  # Spam
        else:
            return 0  # Ham

# Example usage:
if __name__ == "__main__":
    # Example data (emails and labels)
    emails = [
        "Buy cheap Viagra now!!!",
        "Hello, how are you?",
        "Get free money!",
        "Dear friend, please send the documents."
    ]
    labels = [1, 0, 1, 0]  # 1 for spam, 0 for ham

    # Initialize and train the Bayesian Spam Filter
    filter = BayesianSpamFilter()
    filter.train(emails, labels)

    # Test the filter with new emails
    test_emails = [
        "Buy now, limited offer!",
        "Hi, just checking in.",
        "Send me your bank details.",
        "Congratulations! You've won a prize."
    ]

    for email in test_emails:
        prediction = filter.predict(email)
        if prediction == 1:
            print(f"'{email}' is predicted as spam.")
        else:
            print(f"'{email}' is predicted as ham.")

# Hierarchical Bayesian Model

In [None]:
import numpy as np
import pymc3 as pm

# Generate simulated data
np.random.seed(42)
num_groups = 3
group_sizes = np.random.randint(10, 20, num_groups)
data = []
for i, size in enumerate(group_sizes):
    x = np.random.randn(size)
    y = 2 * x + np.random.randn(size)
    data.extend(zip(x, y, [i] * size))

x, y, groups = zip(*data)
x, y, groups = np.array(x), np.array(y), np.array(groups)

# Hierarchical linear regression model
with pm.Model() as hierarchical_model:
    # Hyperpriors
    mu_alpha = pm.Normal('mu_alpha', mu=0, sd=10)
    sigma_alpha = pm.HalfNormal('sigma_alpha', sd=10)
    mu_beta = pm.Normal('mu_beta', mu=0, sd=10)
    sigma_beta = pm.HalfNormal('sigma_beta', sd=10)

    # Group-level parameters
    alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=num_groups)
    beta = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=num_groups)

    # Likelihood
    mu = alpha[groups] + beta[groups] * x
    sigma = pm.HalfNormal('sigma', sd=1)
    y_obs = pm.Normal('y_obs', mu=mu, sd=sigma, observed=y)

# Sampling
with hierarchical_model:
    trace = pm.sample(2000, tune=2000)

# Posterior analysis
pm.summary(trace)
pm.traceplot(trace)

# Implementation of Bayesian Non-parametrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.mixture import BayesianGaussianMixture

# Generate simulated data
np.random.seed(42)
data = np.concatenate([
    np.random.normal(-5, 1, 300),
    np.random.normal(0, 1, 300),
    np.random.normal(5, 1, 400)
])

# Fit Dirichlet Process Mixture Model
dpgmm = BayesianGaussianMixture(n_components=10, covariance_type='full')
dpgmm.fit(data.reshape(-1, 1))

# Plot results
x = np.linspace(-10, 10, 1000)
plt.hist(data, bins=30, density=True, alpha=0.5, label='Histogram of data')
for i in range(dpgmm.n_components):
    y = np.exp(dpgmm.weights_[i]) * norm.pdf(x, dpgmm.means_[i, 0], np.sqrt(dpgmm.covariances_[i, 0, 0]))
    plt.plot(x, y, label=f'Component {i}')
plt.title('Bayesian Gaussian Mixture Model')
plt.legend()
plt.show()

# Implementation of Bayesian Decision Theory

In [None]:
import numpy as np

# Define States of the world and actions
States = ['sunny', 'cloudy', 'rainy']
actions = ['go_outside', 'stay_inside']

# Prior probabilities over States
prior_probs = np.array([0.4, 0.3, 0.3])

# Conditional probabilities of actions given States
action_probs = np.array([
    [0.9, 0.1],
    [0.5, 0.5],
    [0.2, 0.8]
])

# Utility function
utility = np.array([
    [1, 0],
    [0, 1],
    [0, -1]
])

# Calculate expected utilities
expected_utilities = np.dot(prior_probs, action_probs * utility)

# Make decision
decision_index = np.argmax(expected_utilities)
decision = actions[decision_index]

# Output results
print("Expected Utilities:", expected_utilities)
print("Decision:", decision)