<a href="https://colab.research.google.com/github/pgordin/GraphsSN2024_1/blob/main/Hungarian_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

def hungarian_algorithm(cost_matrix):
    cost_matrix = np.array(cost_matrix)
    original_matrix = cost_matrix.copy()  # we keep a copy of the original matrix for cost calculation
    n, m = cost_matrix.shape

    # subtract row minimum
    #it ensures that every row contains at least one zero simplifying the cost matrix
    for i in range(n):
        cost_matrix[i] -= cost_matrix[i].min()

    # subtract column minimum (the same as we did with rows)
    for j in range(m):
        cost_matrix[:, j] -= cost_matrix[:, j].min()

    # cover all zeros with a minimum number of lines
    def cover_zeros(matrix):
        covered_rows = set()
        covered_columns = set()
        zero_positions = list(zip(*np.where(matrix == 0))) #we find all zero positions
        while zero_positions:
            # count zeros per row and per column
            row_counts = np.zeros(n)
            col_counts = np.zeros(m)
            # counting zeros in each row and column
            for r, c in zero_positions:
                row_counts[r] += 1
                col_counts[c] += 1
            # choosing row or column with the most zeros
            if row_counts.max() >= col_counts.max():
                row = row_counts.argmax()
                covered_rows.add(row)
                zero_positions = [(r, c) for r, c in zero_positions if r != row] # removing all covered zeros in this row from zero_positions
            else:
                col = col_counts.argmax()
                covered_columns.add(col)
                zero_positions = [(r, c) for r, c in zero_positions if c != col] # removing all the covered zeros column from zero_positions

        return covered_rows, covered_columns

    while True:
        covered_rows, covered_columns = cover_zeros(cost_matrix)
        total_covered = len(covered_rows) + len(covered_columns) #we check how many lines were used to cover all zeros in the matrix
        if total_covered >= n:
            break

        # adjust the matrix (if the number of lines covering all zeros is less than the matrix size)
        uncovered_values = [
            cost_matrix[r, c]
            for r in range(n)
            for c in range(m)
            if r not in covered_rows and c not in covered_columns]
        min_uncovered = min(uncovered_values)
        for r in range(n):
            for c in range(m):
                if r not in covered_rows and c not in covered_columns: # subtract min value from uncovered values
                    cost_matrix[r, c] -= min_uncovered
                elif r in covered_rows and c in covered_columns: # add the min value to the zero in the intersection of the lines
                    cost_matrix[r, c] += min_uncovered

    # find the optimal assignment (this part was changed to consider all of the rows)
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    optimal_assignment = list(zip(row_ind, col_ind))

    # calculating the minimum cost
    minimum_cost = sum(original_matrix[r, c] for r, c in optimal_assignment)
    return optimal_assignment, minimum_cost


# example
original_matrix = [
    [-4, -1, -6, -2, -3],
    [-5, 0, -3, -7, -6],
    [-2, -3, -4, -5, -8],
    [-3, -4, -6, -3, -4],
    [-4, -6, -5, -8, -6]
]

assignment, cost = hungarian_algorithm(original_matrix)
print("Optimal Assignment:", assignment)
print("Minimum Cost:", cost)


Optimal Assignment: [(0, 2), (1, 0), (2, 4), (3, 1), (4, 3)]
Minimum Cost: -31


CODE'S CONNECTION WITH GRAPH THEORY:

The assignment problem can be modeled as a weighted bipartite graph, where one set of nodes represents workers (rows) and the other set represents tasks (columns).
The edges between these sets have weights representing the cost of assigning a worker to a task (values in the cost matrix).

Our goal is to find the perfect matching in this bipartite graph with the minimum (or maximum as in this example) total cost.
A perfect matching is a subset of edges such that each node is connected to exactly one other node (every worker is assigned to exactly one task).