# **CSP:**

### Basic CSP Solver

In [None]:
from ortools.sat.python import cp_model 

model = cp_model.CpModel()

num_vals = 3 
x = model.new_int_var(0, num_vals - 1, "x")
y = model.new_int_var(0, num_vals - 1, "y")
z = model.new_int_var(0, num_vals - 1, "z")

model.add(x != y)

solver = cp_model.CpSolver()
status = solver.solve(model)

if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
    print(f"x = {solver.value(x)}")
    print(f"y = {solver.value(y)}")
    print(f"z = {solver.value(z)}")
else:
    print("No solution found.")

### Enumerates all solutions

In [None]:
from ortools.sat.python import cp_model

# Custom solution printer to print all solutions
class VarArraySolutionPrinter(cp_model.CpSolverSolutionCallback):
    """Print intermediate solutions."""

    def __init__(self, variables: list[cp_model.IntVar]):
        cp_model.CpSolverSolutionCallback.__init__(self)
        self.__variables = variables
        self.__solution_count = 0

    def on_solution_callback(self) -> None:
        self.__solution_count += 1
        for v in self.__variables:
            print(f"{v}={self.value(v)}", end=" ")
        print()

    @property
    def solution_count(self) -> int:
        return self.__solution_count

# Create the model
model = cp_model.CpModel()

# Define variables (values from 0 to 2)
num_vals = 3
x = model.new_int_var(0, num_vals - 1, "x")
y = model.new_int_var(0, num_vals - 1, "y")
z = model.new_int_var(0, num_vals - 1, "z")

# Add constraint
model.add(x != y)

# Set up solver and solution printer
solver = cp_model.CpSolver()
solution_printer = VarArraySolutionPrinter([x, y, z])

# Request enumeration of all solutions
solver.parameters.enumerate_all_solutions = True

# Solve the model
status = solver.solve(model, solution_printer)

# Print status and solution count
print(f"Status = {solver.status_name(status)}")
print(f"Number of solutions found: {solution_printer.solution_count}")


### Linear Programming problem with constraints and objective maximization.

In [None]:
from ortools.sat.python import cp_model

def main() -> None:
    """Minimal CP-SAT example to showcase calling the solver."""
    # Creates the model.
    model = cp_model.CpModel()

    # Creates the variables.
    var_upper_bound = max(50, 45, 37)
    x = model.new_int_var(0, var_upper_bound, "x")
    y = model.new_int_var(0, var_upper_bound, "y")
    z = model.new_int_var(0, var_upper_bound, "z")

    # Creates the constraints.
    model.add(2 * x + 7 * y + 3 * z <= 50)
    model.add(3 * x - 5 * y + 7 * z <= 45)
    model.add(5 * x + 2 * y - 6 * z <= 37)

    model.maximize(2 * x + 2 * y + 3 * z)

    # Creates a solver and solves the model.
    solver = cp_model.CpSolver()
    status = solver.solve(model)

    if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
        print(f"Maximum of objective function: {solver.objective_value}\n")
        print(f"x = {solver.value(x)}")
        print(f"y = {solver.value(y)}")
        print(f"z = {solver.value(z)}")
    else:
        print("No solution found.")

    # Statistics.
    print("\nStatistics")
    print(f"  status   : {solver.status_name(status)}")
    print(f"  conflicts: {solver.num_conflicts}")
    print(f"  branches : {solver.num_branches}")
    print(f"  wall time: {solver.wall_time} s")


main()

### Job Scheduling

In [13]:
from ortools.sat.python import cp_model

# Create the model
model = cp_model.CpModel()

# Job durations and number of jobs
job_durations = [3, 2, 2]
num_jobs = len(job_durations)
horizon = sum(job_durations)  # Maximum total time needed

# Create start time variables for each job
start_0 = model.new_int_var(0, horizon, "start_0")
start_1 = model.new_int_var(0, horizon, "start_1")
start_2 = model.new_int_var(0, horizon, "start_2")

# Create interval variables (for no-overlap constraint)
interval_0 = model.new_interval_var(start_0, job_durations[0], start_0 + job_durations[0], "interval_0")
interval_1 = model.new_interval_var(start_1, job_durations[1], start_1 + job_durations[1], "interval_1")
interval_2 = model.new_interval_var(start_2, job_durations[2], start_2 + job_durations[2], "interval_2")

# Ensure no jobs overlap
model.add_no_overlap([interval_0, interval_1, interval_2])

# Optional: Minimize makespan (when the last job finishes)
makespan = model.new_int_var(0, horizon, "makespan")
model.add(makespan >= start_0 + job_durations[0])
model.add(makespan >= start_1 + job_durations[1])
model.add(makespan >= start_2 + job_durations[2])
model.minimize(makespan)

# Solve the model
solver = cp_model.CpSolver()
status = solver.solve(model)

# Output the result
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
    print("Job Schedule:")
    print(f"Job 1 starts at time {solver.value(start_0)}")
    print(f"Job 2 starts at time {solver.value(start_1)}")
    print(f"Job 3 starts at time {solver.value(start_2)}")
    print(f"Total time (makespan): {solver.value(makespan)}")
else:
    print("No solution found.")


Job Schedule:
Job 1 starts at time 0
Job 2 starts at time 5
Job 3 starts at time 3
Total time (makespan): 7


### Map Coloring Problem:

In [None]:
from ortools.sat.python import cp_model

def map_coloring():
    model = cp_model.CpModel()

    colors = ['Red', 'Green', 'Blue']
    color_ids = range(len(colors))

    regions = ['WA', 'NT', 'SA', 'Q', 'NSW', 'V', 'T']
    variables = {r: model.new_int_var(0, len(colors)-1, r) for r in regions}

    # Adjacency constraints
    neighbors = [
        ('WA', 'NT'), ('WA', 'SA'),
        ('NT', 'SA'), ('NT', 'Q'),
        ('SA', 'Q'), ('SA', 'NSW'), ('SA', 'V'),
        ('Q', 'NSW'), ('NSW', 'V')
    ]
    for r1, r2 in neighbors:
        model.add(variables[r1] != variables[r2])

    solver = cp_model.CpSolver()
    status = solver.solve(model)

    if status in (cp_model.FEASIBLE, cp_model.OPTIMAL):
        for r in regions:
            print(f"{r}: {colors[solver.value(variables[r])]}")
    else:
        print("No solution found.")

map_coloring()


### Sudoko Solver:

In [2]:
from ortools.sat.python import cp_model

def sudoku_4x4():
    model = cp_model.CpModel()
    cells = {}

    for i in range(4):
        for j in range(4):
            cells[(i,j)] = model.new_int_var(1, 4, f'cell_{i}_{j}')

    # Row and column constraints
    for i in range(4):
        model.add_all_different([cells[(i,j)] for j in range(4)])
        model.add_all_different([cells[(j,i)] for j in range(4)])

    # 2x2 box constraints
    for box_row in range(2):
        for box_col in range(2):
            box = []
            for i in range(2):
                for j in range(2):
                    box.append(cells[(box_row*2 + i, box_col*2 + j)])
            model.add_all_different(box)

    # Partial clues
    model.add(cells[(0,1)] == 3)
    model.add(cells[(1,3)] == 4)
    model.add(cells[(2,0)] == 1)
    model.add(cells[(3,2)] == 2)

    solver = cp_model.CpSolver()
    status = solver.solve(model)

    if status in (cp_model.FEASIBLE, cp_model.OPTIMAL):
        for i in range(4):
            print([solver.value(cells[(i,j)]) for j in range(4)])
    else:
        print("No solution found.")

sudoku_4x4()


[4, 3, 1, 2]
[2, 1, 3, 4]
[1, 2, 4, 3]
[3, 4, 2, 1]


### Send + More = Money (Cryptarithmetic)

In [None]:
from ortools.sat.python import cp_model

def send_more_money():
    model = cp_model.CpModel()
    letters = 'SENDMORY'
    vars = {l: model.new_int_var(0, 9, l) for l in letters}

    model.add_all_different(vars.values())
    model.add(vars['S'] != 0)
    model.add(vars['M'] != 0)

    send = 1000*vars['S'] + 100*vars['E'] + 10*vars['N'] + vars['D']
    more = 1000*vars['M'] + 100*vars['O'] + 10*vars['R'] + vars['E']
    money = 10000*vars['M'] + 1000*vars['O'] + 100*vars['N'] + 10*vars['E'] + vars['Y']

    model.add(send + more == money)

    solver = cp_model.CpSolver()
    status = solver.solve(model)

    if status in (cp_model.FEASIBLE, cp_model.OPTIMAL):
        for l in letters:
            print(f"{l} = {solver.value(vars[l])}")
    else:
        print("No solution found.")

send_more_money()


### N-Queens Problem:

In [None]:
from ortools.sat.python import cp_model

def n_queens(n=4):
    model = cp_model.CpModel()
    queens = [model.new_int_var(0, n-1, f'q{i}') for i in range(n)]

    # Row and diagonal constraints
    model.add_all_different(queens)  # Columns
    model.add_all_different([queens[i] + i for i in range(n)])  # Major diagonal
    model.add_all_different([queens[i] - i for i in range(n)])  # Minor diagonal

    solver = cp_model.CpSolver()
    status = solver.solve(model)

    if status in (cp_model.FEASIBLE, cp_model.OPTIMAL):
        for row in range(n):
            line = ['Q' if solver.value(queens[row]) == col else '.' for col in range(n)]
            print(' '.join(line))
    else:
        print("No solution found.")

n_queens()

# Simple Beam Search:

In [3]:
import heapq

# Define the graph as an adjacency list with edge costs
graph = {
    'S': [('A', 3), ('B', 6), ('C', 5)],
    'A': [('D', 9), ('E', 8)],
    'B': [('F', 12), ('G', 14)],
    'C': [('H', 7)],
    'H': [('I', 5), ('J', 6)],
    'I': [('K', 1), ('L', 10), ('M', 2)],
    'D': [], 'E': [], 'F': [], 'G': [], 'J': [],
    'K': [], 'L': [], 'M': []  # Leaf nodes
}

# Beam Search function
def beam_search(start, goal, beam_width=2):
    # Initialize the beam with the start state
    beam = [(0, [start])]  # (cumulative cost, path)

    while beam:
        candidates = []

        # Expand each path in the beam
        for cost, path in beam:
            current_node = path[-1]
            if current_node == goal:
                return path, cost  # Return the path and cost if goal is reached

            # Generate successors
            for neighbor, edge_cost in graph.get(current_node, []):
                new_cost = cost + edge_cost
                new_path = path + [neighbor]
                # print(current_node, new_path)
                candidates.append((new_cost, new_path))

        # Select top-k paths based on the lowest cumulative cost
        beam = heapq.nsmallest(beam_width, candidates, key=lambda x: x[0])
        # print(beam)
        # print("can: ", candidates)
    return None, float('inf')  # Return None if no path is found

# Run Beam Search
start_node = 'S'
goal_node = 'L'
beam_width = 3
path, cost = beam_search(start=start_node, goal=goal_node, beam_width=beam_width)

# Print results
if path:
    print(f"Path found: {' → '.join(path)} with total cost: {cost}")
else:
    print("No path found.")


Path found: S → C → H → I → L with total cost: 27


# Hill Climbing:

### Simple Code:

In [4]:
def f(x):
    return -x**2 + 10*x  # The function we want to maximize

def hill_climb(start, step_size=1, max_iterations=100):
    current_x = start
    current_value = f(current_x)

    for _ in range(max_iterations):
        # Check neighbors
        next_x1 = current_x + step_size
        next_x2 = current_x - step_size

        next_value1 = f(next_x1)
        next_value2 = f(next_x2)

        # Choose the best neighbor
        if next_value1 > current_value:
            current_x, current_value = next_x1, next_value1
        elif next_value2 > current_value:
            current_x, current_value = next_x2, next_value2
        else:
            # No better neighbor found, stop
            break

    return current_x, current_value

# Run hill climbing
solution, value = hill_climb(start=0)
print(f"Best solution found: x = {solution}, f(x) = {value}")

Best solution found: x = 5, f(x) = 25


### TSP using Hill-Climbing:

In [5]:
import random
import math

# Distance between cities (coordinates)
cities = [(0, 0), (2, 4), (5, 2), (6, 6), (8, 3)]

def distance(a, b):
    return math.hypot(a[0] - b[0], a[1] - b[1])

def total_distance(route):
    return sum(distance(route[i], route[(i + 1) % len(route)]) for i in range(len(route)))

def get_neighbor(route):
    a, b = random.sample(range(len(route)), 2)
    neighbor = route[:]
    neighbor[a], neighbor[b] = neighbor[b], neighbor[a]
    return neighbor

def hill_climb_tsp():
    current = cities[:]
    random.shuffle(current)
    current_cost = total_distance(current)

    for _ in range(1000):
        neighbor = get_neighbor(current)
        cost = total_distance(neighbor)
        if cost < current_cost:
            current, current_cost = neighbor, cost
    return current, current_cost

best_route, best_cost = hill_climb_tsp()
print(f"Best route: {best_route}\nCost: {best_cost}")


Best route: [(0, 0), (2, 4), (6, 6), (8, 3), (5, 2)]
Cost: 21.097265652766033


### 8-Queens Problem using simple hill climbing:

In [6]:
import random

# Heuristic function: Counts the number of pairs of attacking queens
def calculate_conflicts(state):

    conflicts = 0
    n = len(state)
    for i in range(n):
        for j in range(i + 1, n):
            # Check same column or diagonal
            if state[i] == state[j] or abs(state[i] - state[j]) == abs(i - j):
                conflicts += 1
    return conflicts

# Generate neighbors by moving one queen at a time
def get_neighbors(state):
    neighbors = []
    n = len(state)
    for row in range(n):
        for col in range(n):
            if col != state[row]:
                new_state = list(state)
                new_state[row] = col
                neighbors.append(new_state)
    return neighbors

# Simple Hill Climbing function
def simple_hill_climbing(n):
    # Random initial state
    current_state = [random.randint(0, n - 1) for _ in range(n)]
    current_conflicts = calculate_conflicts(current_state)

    while True:
        neighbors = get_neighbors(current_state)
        next_state = None
        next_conflicts = current_conflicts

        # Find the first better neighbor
        for neighbor in neighbors:
            neighbor_conflicts = calculate_conflicts(neighbor)
            if neighbor_conflicts < next_conflicts:
                next_state = neighbor
                next_conflicts = neighbor_conflicts
                break  # Move to the first better neighbor

        # If no better neighbor is found, return the current state
        if next_conflicts >= current_conflicts:
            break

        # Move to the better neighbor
        current_state = next_state
        current_conflicts = next_conflicts

    return current_state, current_conflicts

# Run Simple Hill Climbing for N-Queens
n = 8  # Change N here for different sizes
solution, conflicts = simple_hill_climbing(8)

# Print results
if conflicts == 0:
    print(f"Solution found for {n}-Queens problem:")
    print(solution)
else:
    print(f"Could not find a solution. Stuck at state with {conflicts} conflicts:")
    print(solution)

Solution found for 8-Queens problem:
[6, 3, 1, 7, 5, 0, 2, 4]


### 8 Queens Problem using Best-choice hill-climbing:

In [11]:
import random

def calculate_attacks(state):
    attacks = 0
    n = len(state)
    for i in range(n):
        for j in range(i + 1, n):
            if state[i] == state[j] or abs(state[i] - state[j]) == abs(i - j):
                attacks += 1
    return attacks

def get_best_neighbor(state):
    n = len(state)
    best_state = list(state)
    min_attacks = calculate_attacks(state)

    for row in range(n):
        for col in range(n):
            if col == state[row]:
                continue
            new_state = list(state)
            new_state[row] = col
            attacks = calculate_attacks(new_state)
            if attacks < min_attacks:
                best_state = new_state
                min_attacks = attacks
    return best_state, min_attacks

def hill_climbing_queens():
    state = [random.randint(0, 7) for _ in range(8)]
    attacks = calculate_attacks(state)
    while True:
        neighbor, neighbor_attacks = get_best_neighbor(state)
        if neighbor_attacks >= attacks:
            return state, attacks
        state, attacks = neighbor, neighbor_attacks

solution, conflicts = hill_climbing_queens()
print(f"Solution: {solution}, Conflicts: {conflicts}")

Solution: [2, 0, 7, 4, 1, 3, 0, 7], Conflicts: 2


# Game Theory:

### Simple MinMax Pseudocode:

function minimax(node, depth, maximizingPlayer) is
    if depth == 0 or node is a terminal node then
        return static evaluation of node

    if MaximizingPlayer then // for Maximizer Player
        maxEva = -infinity
        for each child of node do
            eva = minimax(child, depth - 1, false)
            maxEva = max(maxEva, eva) // Gives the maximum of the values
        return maxEva
    else // for Minimizer Player
        minEva = +infinity
        for each child of node do
            eva = minimax(child, depth - 1, true)
            minEva = min(minEva, eva) // Gives the minimum of the values
        return minEva

In [13]:
import math
class Node:
    def __init__(self, value=None):
        self.value = value
        self.children = []
        self.minmax_value = None
    
class MinMaxAgent:
    def __init__(self, depth):
        self.depth = depth
    def formulate_goal(self, node):
        return "Goal Reached" if node.minmax_value is not None else "Searchin"
    def act(self, node, environment):
        goal_status = self.formulate_goal(node)
        if goal_status == "Goal Reached":
            return f"Minmax valye for root node: {node.minmax_value}"
        else:
            return environment.compute_minmax(node, self.depth)

class Environment:
    def __init__(self, tree):
        self.tree = tree
        self.computed_nodes = []
    def get_percept(self, node):
        return node
    def compute_minmax(self, node, depth, maximing_player=True):
        if depth == 0 or not node.children:
            self.computed_nodes.append(node.value)
            return node.value
        if maximing_player:
            value = -math.inf
            for child in node.children:
                child_value = self.compute_minmax(child, depth -1, False)
                value = max(value, child_value)
            node.minmax_value = value
            self.computed_nodes.append(node.value)
            return value
        else:
            value = math.inf
            for child in node.children:
                child_value = self.compute_minmax(child, depth - 1, True)
                value = min(value, child_value)
            node.minmax_value = value
            self.computed_nodes.append(node.value)
            return value

def run_agent(agent, environment, start_node):
    percept = environment.get_percept(start_node)
    agent.act(percept, environment)


root = Node('A')
n1 = Node('B')
n2 = Node('C')
root.children = [n1, n2]
n3 = Node('D')
n4 = Node('E')
n5 = Node('F')
n6 = Node('G')
n1.children = [n3, n4]
n2.children = [n5, n6]
n7 = Node(2)
n8 = Node(3)
n9 = Node(5)
n10 = Node(9)
n3.children = [n7, n8]
n4.children = [n9, n10]

n11 = Node(0)
n12 = Node(1)
n13 = Node(7)
n14 = Node(5)
n5.children = [n11, n12]
n6.children = [n13, n14]
# define depth for Minimax
depth = 3
agent = MinMaxAgent(depth)
environment = Environment(root)
run_agent(agent, environment, root)
print("Computed Nodes: ", environment.computed_nodes)

print("MinMax Values: ")
print("A: ", root.minmax_value)
print("B: ", n1.minmax_value)
print("C: ", n2.minmax_value)
print("D: ", n3.minmax_value)
print("E: ", n4.minmax_value)
print("F: ", n5.minmax_value)
print("G: ", n6.minmax_value)


Computed Nodes:  [2, 3, 'D', 5, 9, 'E', 'B', 0, 1, 'F', 7, 5, 'G', 'C', 'A']
MinMax Values: 
A:  3
B:  3
C:  1
D:  3
E:  9
F:  1
G:  7


### Simple MinMax with Alpha-Beta Pruning:

In [14]:
import math

class Node:
    def __init__(self, value=None):
        self.value = value
        self.children = []
        self.minmax_value = None

class MinimaxAgent:
    def __init__(self, depth):
        self.depth = depth

    def formulate_goal(self, node):
        return "Goal reached" if node.minmax_value is not None else "Searching"

    def act(self, node, environment):
        goal_status = self.formulate_goal(node)
        if goal_status == "Goal reached":
            return f"Minimax value for root node: {node.minmax_value}"
        else:
            return environment.alpha_beta_search(node, self.depth, -math.inf, math.inf, True)

class Environment:
    def __init__(self, tree):
        self.tree = tree
        self.computed_nodes = []

    def get_percept(self, node):
        return node

    def alpha_beta_search(self, node, depth, alpha, beta, maximizing_player=True):
        self.computed_nodes.append(node.value)
        if depth == 0 or not node.children:
            return node.value
        if maximizing_player:
            value = -math.inf
            for child in node.children:
                value = max(value, self.alpha_beta_search(child, depth - 1, alpha, beta, False))
                alpha = max(alpha, value)
                if beta <= alpha:
                    print("Pruned node:", child.value)
                    break
            node.minmax_value = value
            return value
        else:
            value = math.inf
            for child in node.children:
                value = min(value, self.alpha_beta_search(child, depth - 1, alpha, beta, True))
                beta = min(beta, value)
                if beta <= alpha:
                    print("Pruned node:", child.value)
                    break
            node.minmax_value = value
            return value

def run_agent(agent, environment, start_node):
    percept = environment.get_percept(start_node)
    agent.act(percept, environment)

# Constructing the tree
root = Node('A')
n1 = Node('B')
n2 = Node('C')
root.children = [n1, n2]

n3 = Node('D')
n4 = Node('E')
n5 = Node('F')
n6 = Node('G')
n1.children = [n3, n4]
n2.children = [n5, n6]

n7 = Node(2)
n8 = Node(3)
n9 = Node(5)
n10 = Node(9)
n3.children = [n7, n8]
n4.children = [n9, n10]

n11 = Node(0)
n12 = Node(1)
n13 = Node(7)
n14 = Node(5)
n5.children = [n11, n12]
n6.children = [n13, n14]

# Define depth for Alpha-Beta pruning
depth = 3
agent = MinimaxAgent(depth)
environment = Environment(root)
run_agent(agent, environment, root)

print("Computed Nodes:", environment.computed_nodes)
print("Minimax values:")
print(f"A: {root.minmax_value}")
print(f"B: {n1.minmax_value}")
print(f"C: {n2.minmax_value}")
print(f"D: {n3.minmax_value}")
print(f"E: {n4.minmax_value}")
print(f"F: {n5.minmax_value}")
print(f"G: {n6.minmax_value}")

Pruned node: 5
Pruned node: F
Computed Nodes: ['A', 'B', 'D', 2, 3, 'E', 5, 'C', 'F', 0, 1]
Minimax values:
A: 3
B: 3
C: 1
D: 3
E: 5
F: 1
G: None


# Bayes' Network

### Burglary-Alarm System:

In [17]:
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Step 1: Define the structure of the Bayesian Network
model = DiscreteBayesianNetwork([
    ('Burglary', 'Alarm'),
    ('Earthquake', 'Alarm'),
    ('Alarm', 'JohnCalls'),
    ('Alarm', 'MaryCalls')
])

# Step 2: Define the CPDs (Conditional Probability Distributions)
cpd_burglary = TabularCPD(variable='Burglary', variable_card=2, values=[[0.999], [0.001]])
cpd_earthquake = TabularCPD(variable='Earthquake', variable_card=2, values=[[0.998], [0.002]])
cpd_alarm = TabularCPD(
    variable='Alarm',
    variable_card=2,
    values=[
        [0.999, 0.71, 0.06, 0.05],  # Alarm = False
        [0.001, 0.29, 0.94, 0.95]   # Alarm = True
    ],
    evidence=['Burglary', 'Earthquake'],
    evidence_card=[2, 2]
)
cpd_john = TabularCPD(
    variable='JohnCalls',
    variable_card=2,
    values=[
        [0.3, 0.9],  # JohnCalls = False
        [0.7, 0.1]   # JohnCalls = True
    ],
    evidence=['Alarm'],
    evidence_card=[2]
)
cpd_mary = TabularCPD(
    variable='MaryCalls',
    variable_card=2,
    values=[
        [0.2, 0.99],  # MaryCalls = False
        [0.8, 0.01]   # MaryCalls = True
    ],
    evidence=['Alarm'],
    evidence_card=[2]
)

# Step 3: Add CPDs to the model
model.add_cpds(cpd_burglary, cpd_earthquake, cpd_alarm, cpd_john, cpd_mary)

# Step 4: Verify the model
assert model.check_model(), "Model is incorrect"

# Step 5: Perform inference
inference = VariableElimination(model)
result = inference.query(variables=['Burglary'], evidence={'JohnCalls': 1, 'MaryCalls': 1})
print(result)


  from .autonotebook import tqdm as notebook_tqdm


+-------------+-----------------+
| Burglary    |   phi(Burglary) |
| Burglary(0) |          0.9999 |
+-------------+-----------------+
| Burglary(1) |          0.0001 |
+-------------+-----------------+


### Medical Diagnosis (Cancer Detection)

In [18]:
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

model = DiscreteBayesianNetwork([('Pollution', 'Cancer'), ('Smoker', 'Cancer'), ('Cancer', 'XRay'), ('Cancer', 'Dyspnoea')])

cpd_pollution = TabularCPD('Pollution', 2, [[0.9], [0.1]])
cpd_smoker = TabularCPD('Smoker', 2, [[0.7], [0.3]])
cpd_cancer = TabularCPD('Cancer', 2,
    values=[[0.99, 0.9, 0.97, 0.05],
            [0.01, 0.1, 0.03, 0.95]],
    evidence=['Pollution', 'Smoker'],
    evidence_card=[2, 2])
cpd_xray = TabularCPD('XRay', 2, [[0.2, 0.9], [0.8, 0.1]], evidence=['Cancer'], evidence_card=[2])
cpd_dyspnoea = TabularCPD('Dyspnoea', 2, [[0.65, 0.3], [0.35, 0.7]], evidence=['Cancer'], evidence_card=[2])

model.add_cpds(cpd_pollution, cpd_smoker, cpd_cancer, cpd_xray, cpd_dyspnoea)
assert model.check_model()

inference = VariableElimination(model)
result = inference.query(variables=['Cancer'], evidence={'Smoker': 1, 'Pollution': 1})
print(result)

+-----------+---------------+
| Cancer    |   phi(Cancer) |
| Cancer(0) |        0.0500 |
+-----------+---------------+
| Cancer(1) |        0.9500 |
+-----------+---------------+


### Spam Email Detection:

In [19]:
model = DiscreteBayesianNetwork([('HasOffer', 'Spam'), ('ContainsLink', 'Spam')])

cpd_offer = TabularCPD('HasOffer', 2, [[0.6], [0.4]])
cpd_link = TabularCPD('ContainsLink', 2, [[0.7], [0.3]])
cpd_spam = TabularCPD('Spam', 2,
    values=[[0.95, 0.8, 0.85, 0.3],
            [0.05, 0.2, 0.15, 0.7]],
    evidence=['HasOffer', 'ContainsLink'],
    evidence_card=[2, 2])

model.add_cpds(cpd_offer, cpd_link, cpd_spam)
assert model.check_model()

inference = VariableElimination(model)
result = inference.query(variables=['Spam'], evidence={'HasOffer': 1, 'ContainsLink': 1})
print(result)


+---------+-------------+
| Spam    |   phi(Spam) |
| Spam(0) |      0.3000 |
+---------+-------------+
| Spam(1) |      0.7000 |
+---------+-------------+


### Student Exam Performance: 

In [20]:
model = DiscreteBayesianNetwork([('Study', 'Grade'), ('Sleep', 'Grade')])

cpd_study = TabularCPD('Study', 2, [[0.3], [0.7]])
cpd_sleep = TabularCPD('Sleep', 2, [[0.4], [0.6]])
cpd_grade = TabularCPD('Grade', 2,
    values=[[0.9, 0.6, 0.7, 0.1],
            [0.1, 0.4, 0.3, 0.9]],
    evidence=['Study', 'Sleep'],
    evidence_card=[2, 2])

model.add_cpds(cpd_study, cpd_sleep, cpd_grade)
assert model.check_model()

inference = VariableElimination(model)
result = inference.query(variables=['Grade'], evidence={'Study': 1, 'Sleep': 0})
print(result)


+----------+--------------+
| Grade    |   phi(Grade) |
| Grade(0) |       0.7000 |
+----------+--------------+
| Grade(1) |       0.3000 |
+----------+--------------+


## EDA + Model Training + Evaluation:

In [None]:
# Importing necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---------------------------------------
# 1. Load and Prepare the Data
# ---------------------------------------

# Load Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].apply(lambda x: iris.target_names[x])  # Convert to labels

# Show first few rows
print("Sample data:")
print(df.head())

# ---------------------------------------
# 2. Exploratory Data Analysis (EDA)
# ---------------------------------------

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

# Summary statistics
print("\nSummary statistics:")
print(df.describe())

# Pairplot for feature relationships
sns.pairplot(df, hue='species')
plt.suptitle("Pairwise Feature Relationships", y=1.02)
plt.show()

# Correlation heatmap
plt.figure(figsize=(8, 5))
sns.heatmap(df.drop('species', axis=1).corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

# ---------------------------------------
# 3. Model Training
# ---------------------------------------

# Encode species as integers
df['species'] = df['species'].astype('category').cat.codes

# Separate features and target
X = df.drop('species', axis=1)
y = df['species']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# ---------------------------------------
# 4. Model Evaluation
# ---------------------------------------

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy score
acc = accuracy_score(y_test, y_pred)
print(f"\nAccuracy on test set: {acc:.2f}")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=iris.target_names,
            yticklabels=iris.target_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))


#### Reading and Writing Data:

In [None]:
import pandas as pd
df = pd.read_csv("Data.csv") # Reading a CSV file
df.to_csv("output.csv", index=False) # Writing to a CSV file
print(df.head()) 

#### Handling missing values:

In [None]:
# Check for missing values
print(df.isnull().sum())
# Drop missing values
df_cleaned = df.dropna()
print("Dataset after dropping missing values:", df_cleaned.shape)
# Handle missing values
df['age'] = df['age'].fillna(df['age'].mean()) # Fill age with mean
df['embarked'] = df['embarked'].fillna(df['embarked'].mode()[0]) # Fill embarked with mode
# Convert deck to string and replace NaN
df['deck'] = df['deck'].astype(str).fillna('Unknown')
# Drop remaining NaN values if needed
df = df.dropna()
print(df.isnull().sum()) # Confirm no missing values

Handling missing data

Label encoding and one-hot encoding

Data visualization

Train-test split

K-Fold cross-validation

Leave-One-Out cross-validation

Confusion matrix

ROC curve

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, KFold, LeaveOneOut
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

# Load a sample dataset (Breast Cancer dataset)
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Introduce some missing values for demonstration
df.iloc[0:10, 0] = np.nan

# Handling missing data using SimpleImputer
imputer = SimpleImputer(strategy='mean')
df[df.columns[:-1]] = imputer.fit_transform(df[df.columns[:-1]])

# Label encoding (not needed here since target is already numeric)
# Just a demonstration
label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['target'])

# One-Hot Encoding (if we had categorical features)
# Here we create a dummy feature to simulate
df['dummy_cat'] = np.random.choice(['A', 'B', 'C'], size=len(df))
df = pd.get_dummies(df, columns=['dummy_cat'], drop_first=True)

# Data visualization
sns.countplot(x='target', data=df)
plt.title("Target Class Distribution")
plt.show()

# Correlation heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(df.corr(), cmap='coolwarm', annot=False)
plt.title("Feature Correlation Heatmap")
plt.show()

# Train-test split
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model training using Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate on test set
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ROC Curve
y_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='blue', label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='red', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

# K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
kfold_scores = cross_val_score(model, X, y, cv=kfold)
print("K-Fold CV Scores:", kfold_scores)
print("Average K-Fold Score:", np.mean(kfold_scores))

# Leave-One-Out Cross Validation
loo = LeaveOneOut()
loo_scores = cross_val_score(model, X, y, cv=loo)
print("Leave-One-Out CV Accuracy (mean):", np.mean(loo_scores))

### Support Vector Machine:

In [24]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
y = (y == 0).astype(int) # Convert to binary classification problem
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Train SVM model with RBF kernel
svm = SVC(kernel='rbf', C=1, gamma='scale')
svm.fit(X_train, y_train)
# Make predictions
y_pred = svm.predict(X_test)
# Evaluate the model
print("SVM Accuracy:", accuracy_score(y_test, y_pred))

SVM Accuracy: 1.0


### Linear Regression

In [26]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load a sample dataset
data = fetch_california_housing()
x = pd.DataFrame(data.data, columns=data.feature_names)  # Features
y = pd.Series(data.target)  # Target (house values)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create and train the Linear Regression model
LR = LinearRegression()
ModelLR = LR.fit(x_train, y_train)

# Predict on the test data
PredictionLR = ModelLR.predict(x_test)

# Print the predictions
print("Predictions on test data:\n", PredictionLR)

# Evaluate the model
mse = mean_squared_error(y_test, PredictionLR)
r2 = r2_score(y_test, PredictionLR)

print(f"\nMean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")


INFO:sklearn.datasets._california_housing:Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to C:\Users\yesau\scikit_learn_data


Predictions on test data:
 [0.71912284 1.76401657 2.70965883 ... 4.46877017 1.18751119 2.00940251]

Mean Squared Error: 0.56
R² Score: 0.58


### Decision tree classifier:

In [27]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load sample dataset
data = load_iris()
x = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Initialize the DecisionTreeClassifier
DT = DecisionTreeClassifier()

# Train the model
ModelDT = DT.fit(x_train, y_train)

# Predict on test data
PredictionDT = DT.predict(x_test)
print("Predictions:", PredictionDT)

# Model Training Accuracy
print('==================== DT Training Accuracy ====================')
tracDT = DT.score(x_train, y_train)  # The score method gives accuracy directly
TrainingAccDT = tracDT * 100
print(f"Training Accuracy: {TrainingAccDT:.2f}%")

# Model Testing Accuracy
print('==================== DT Testing Accuracy =====================')
teacDT = accuracy_score(y_test, PredictionDT)
testingAccDT = teacDT * 100
print(f"Testing Accuracy: {testingAccDT:.2f}%")


Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Training Accuracy: 100.00%
Testing Accuracy: 100.00%


### K-Means clustering:

In [None]:
# Importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Importing the dataset
df = pd.read_csv('Mall_Customers.csv')

# Extracting relevant features: Annual Income and Spending Score
x = df.iloc[:, [3, 4]].values

# Finding optimal number of clusters using the Elbow Method
wcss_list = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(x)
    wcss_list.append(kmeans.inertia_)

# Plotting the Elbow Graph
plt.plot(range(1, 11), wcss_list)
plt.title('The Elbow Method Graph')
plt.xlabel('Number of clusters (k)')
plt.ylabel('WCSS')
plt.show()

# Normalize features for better clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x)

# Training the KMeans model on the dataset
kmeans = KMeans(n_clusters=5, init='k-means++', random_state=42)
y_predict = kmeans.fit_predict(X_scaled)

# Visualizing the clusters
plt.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s=100, c='blue', label='Cluster 1')
plt.scatter(x[y_predict == 1, 0], x[y_predict == 1, 1], s=100, c='green', label='Cluster 2')
plt.scatter(x[y_predict == 2, 0], x[y_predict == 2, 1], s=100, c='red', label='Cluster 3')
plt.scatter(x[y_predict == 3, 0], x[y_predict == 3, 1], s=100, c='black', label='Cluster 4')
plt.scatter(x[y_predict == 4, 0], x[y_predict == 4, 1], s=100, c='purple', label='Cluster 5')

# Plotting Centroids
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 
            s=300, c='yellow', label='Centroid')

plt.title('Clusters of Customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()


### Decision Tree:

In [12]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
iris = datasets.load_iris()
x = iris.data
y = iris.target
y = (y == 0).astype(int)
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
random_state=42)
# Create and train the Linear Regression model
LR = LinearRegression()
ModelLR = LR.fit(x_train, y_train)
# Predict on the test data
PredictionLR = ModelLR.predict(x_test)
# Print the predictions
print("Predictions:", PredictionLR)
from sklearn.metrics import r2_score
print('===================LR Testing Accuracy================')
teachLR = r2_score(y_test, PredictionLR)
testingAccLR = teachLR * 100
print(testingAccLR)

from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
ModelDt = DT.fit(x_train, y_train)

PredictionDT = DT.predict(x_test)
print("Predictions: ", PredictionDT)

print('====================DT Training Accuracy===============')
tracDT = DT.score(x_train, y_train)
TrainingAccDT = tracDT * 100
print(f"Training Accuracy: {TrainingAccDT:.2f}%")

# Model Testing Accuracy
print('=====================DT Testing Accuracy=================')
teacDT = accuracy_score(y_test, PredictionDT)
testingAccDT = teacDT * 100
print(f"Testing Accuracy: {testingAccDT:.2f}%")

Predictions: [ 0.06858754  1.00867146 -0.44710026  0.11520442  0.06738308  0.9268164
  0.30149058  0.03285725 -0.05930489  0.19989713  0.05860855  0.83360058
  1.01238455  0.84301552  1.02396928  0.1851498  -0.15568455  0.14419308
  0.08710758 -0.16992588  0.83271001  0.04251129  0.88704775 -0.16397104
  0.00190482 -0.02397524 -0.25389267 -0.11680648  0.82169091  0.81105763]
88.73227699320645
Predictions:  [0 1 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1]
Training Accuracy: 100.00%
Testing Accuracy: 100.00%
