In [5]:
#matrix
from typing import Iterator, Tuple, List
from collections import defaultdict
import random
from typing import List

Key = Tuple[int, int]
Value = int
MappedItem = Tuple[Key, Value]


def generate_square_matrix(size: int) -> List[List[int]]:
    return [
        [random.randint(0, 20) for _ in range(size)]
        for _ in range(size)
    ]


def map_matrix_multiply(A: List[List[int]], B: List[List[int]]) -> Iterator[MappedItem]:
    n = len(A)
    m = len(A[0])
    k = len(B[0])

    for i in range(n):
        for r in range(m):
            for j in range(k):
                yield (i, j), A[i][r] * B[r][j]


def shuffle(mapped_data: Iterator[MappedItem]) -> dict[Key, List[Value]]:
    grouped = defaultdict(list)
    for key, value in mapped_data:
        grouped[key].append(value)
    return grouped


def reduce_matrix_multiply(grouped_data: dict[Key, List[Value]]) -> Iterator[MappedItem]:
    for key, values in grouped_data.items():
        yield key, sum(values)


def matrix_multiply_mapreduce(A: List[List[int]], B: List[List[int]]) -> List[List[int]]:
    mapped = map_matrix_multiply(A, B)
    shuffled = shuffle(mapped)
    reduced = reduce_matrix_multiply(shuffled)

    n = len(A)
    k = len(B[0])
    C = [[0 for _ in range(k)] for _ in range(n)]

    for (i, j), value in reduced:
        C[i][j] = value

    return C


if __name__ == "__main__":
    A = generate_square_matrix(300)
    B = generate_square_matrix(300)

    C = matrix_multiply_mapreduce(A, B)

    print("Результат:")
    for row in C:
        print(row)

Результат:
[31138, 30878, 32144, 32586, 30730, 31152, 28247, 30348, 31881, 31547, 32742, 31444, 29329, 29807, 30258, 29858, 32379, 31267, 29747, 33030, 30535, 29578, 30934, 30346, 30160, 31499, 30599, 30006, 29075, 31715, 30663, 31065, 30584, 28316, 30415, 30698, 29272, 30385, 29665, 31337, 29152, 29072, 29530, 29175, 31057, 31233, 29485, 31193, 31442, 28353, 29554, 30742, 30991, 29067, 31436, 30558, 31250, 31532, 30757, 30459, 29754, 30484, 31294, 32218, 30617, 28762, 30555, 29443, 29978, 29630, 32582, 30200, 30863, 30790, 32304, 31345, 29663, 28000, 31043, 31101, 31046, 29106, 31456, 27519, 28798, 30393, 31448, 30210, 30256, 30625, 31222, 29449, 29790, 28844, 30523, 31785, 28784, 31327, 29594, 31147, 28592, 30479, 30922, 29773, 30196, 32613, 31607, 31172, 31278, 30329, 30027, 28689, 31334, 31310, 31022, 30705, 29348, 33380, 29122, 30519, 29099, 30614, 29843, 30850, 32979, 29935, 32044, 30208, 31100, 31731, 27958, 30297, 30360, 29221, 29929, 27329, 30344, 31366, 29390, 31685, 28924, 3

In [4]:
#linear
from typing import Iterator, Tuple, List
from collections import defaultdict


MappedItem = Tuple[str, float]


def map_linear_regression(data: List[Tuple[float, float]]) -> Iterator[MappedItem]:
    for x, y in data:
        yield "sum_x", x
        yield "sum_y", y
        yield "sum_xy", x * y
        yield "sum_x2", x * x
        yield "count", 1.0


def shuffle(mapped_data: Iterator[MappedItem]) -> dict[str, List[float]]:
    grouped = defaultdict(list)
    for key, value in mapped_data:
        grouped[key].append(value)
    return grouped


def reduce_linear_regression(grouped_data: dict[str, List[float]]) -> Iterator[MappedItem]:
    for key, values in grouped_data.items():
        yield key, sum(values)


def linear_regression_mapreduce(data: List[Tuple[float, float]]) -> Tuple[float, float]:
    mapped = map_linear_regression(data)
    shuffled = shuffle(mapped)
    reduced = dict(reduce_linear_regression(shuffled))

    n = reduced["count"]
    sum_x = reduced["sum_x"]
    sum_y = reduced["sum_y"]
    sum_xy = reduced["sum_xy"]
    sum_x2 = reduced["sum_x2"]

    a = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x ** 2)
    b = (sum_y - a * sum_x) / n

    return a, b


if __name__ == "__main__":
    data = [
        (10, 20),
        (20, 30),
        (30, 50),
        (40, 40),
        (50, 60),
    ]

    a, b = linear_regression_mapreduce(data)

    print(f"Линейная регрессия:")
    print(f"y = {a:.3f}x + {b:.3f}")

Линейная регрессия:
y = 0.900x + 13.000
