In [7]:
from dataclasses import dataclass
from random import Random
from typing import List, Tuple, Dict

def transpose(M: List[List[float]]) -> List[List[float]]:
    rows = len(M)
    cols = len(M[0])
    result = [[0.0] * rows for _ in range(cols)]
    for i in range(rows):
        for j in range(cols):
            result[j][i] = M[i][j]
    return result

# Для поиска w_0
def add_intercept(X: List[List[float]]) -> List[List[float]]:
    rows = len(X)
    cols = len(X[0])
    result = [[0.0] * (cols + 1) for _ in range(rows)]
    for i in range(rows):
        result[i][0] = 1.0
        for j in range(cols):
            result[i][j + 1] = X[i][j]
    return result

def to_column_matrix(v: List[float]) -> List[List[float]]:
    return [[val] for val in v]

def from_column_matrix(M: List[List[float]]) -> List[float]:
    assert len(M[0]) == 1, "Ожидается матрица-столбец (p×1)"
    return [row[0] for row in M]

def multiply_map_reduce(A: List[List[float]], B: List[List[float]]) -> List[List[float]]:
    rows_a = len(A)
    cols_a = len(A[0])
    cols_b = len(B[0])
    
    def map_phase() -> List[Tuple[Tuple[int, int], float]]:
        mapped = []
        for i in range(rows_a):
            for j in range(cols_b):
                for k in range(cols_a):
                    key = (i, j)
                    value = A[i][k] * B[k][j]
                    mapped.append((key, value))
        return mapped
    
    def shuffle_phase(mapped: List[Tuple[Tuple[int, int], float]]) -> Dict[Tuple[int, int], List[float]]:
        grouped: Dict[Tuple[int, int], List[float]] = {}
        for key, value in mapped:
            if key not in grouped:
                grouped[key] = []
            grouped[key].append(value)
        return grouped
    
    def reduce_phase(grouped: Dict[Tuple[int, int], List[float]]) -> List[List[float]]:
        result = [[0.0] * cols_b for _ in range(rows_a)]
        for (i, j), values in grouped.items():
            result[i][j] = sum(values)
        return result
    
    mapped = map_phase()
    grouped = shuffle_phase(mapped)
    result = reduce_phase(grouped)
    
    return result

# Методом Гаусса
def solve_linear_system(A: List[List[float]], b: List[float]) -> List[float]:
    n = len(A)
    assert all(len(row) == n for row in A), "A должна быть квадратной n×n"
    assert len(b) == n, "Размер b должен быть n"
    
    # Расширенная матрица
    a = [[A[i][j] if j < n else b[i] for j in range(n + 1)] for i in range(n)]

    for col in range(n):
        pivot = col
        best = abs(a[pivot][col])
        for row in range(col + 1, n):
            v = abs(a[row][col])
            if v > best:
                best = v
                pivot = row
        
        assert best != 0.0, "Матрица вырождена или близка к сингулярной"

        if pivot != col:
            a[col], a[pivot] = a[pivot], a[col]

        # Нормализация ведущей строки
        lead = a[col][col]
        for j in range(col, n + 1):
            a[col][j] /= lead
        
        # Обнуление элементов, которые ниже
        for row in range(col + 1, n):
            factor = a[row][col]
            if factor == 0.0:
                continue
            for j in range(col, n + 1):
                a[row][j] -= factor * a[col][j]
    
    x = [0.0] * n
    for row in range(n - 1, -1, -1):
        sum_val = a[row][n]
        for j in range(row + 1, n):
            sum_val -= a[row][j] * x[j]
        x[row] = sum_val
    
    return x

@dataclass
class LinearRegressionResult:
    weights: List[float]
    intercept: bool
    
    def __eq__(self, other):
        if not isinstance(other, LinearRegressionResult):
            return False
        return self.intercept == other.intercept and self.weights == other.weights
    
    def __hash__(self):
        return hash((tuple(self.weights), self.intercept))


def linear_regression_normal_eq(X_raw: List[List[float]], y: List[float], intercept: bool = True) -> LinearRegressionResult:
    assert len(X_raw) == len(y), \
        f"Число строк X ({len(X_raw)}) должно равняться длине y ({len(y)})"
    
    X = add_intercept(X_raw) if intercept else X_raw
    
    Xt = transpose(X)
    XtX = multiply_map_reduce(Xt, X)
    Y_col = to_column_matrix(y)
    XtY_col = multiply_map_reduce(Xt, Y_col)
    XtY = from_column_matrix(XtY_col)
    
    w = solve_linear_system(XtX, XtY)
    return LinearRegressionResult(weights=w, intercept=intercept)

def generate_matrix(rows: int, cols: int, min: float=0.0, max: float=1.0, seed: int=42):
    rand = Random(seed)
    return [
        [rand.uniform(min, max) for _ in range(cols)]
        for _ in range(rows)
    ]

def print_matrix(matrix: List[List[float]], title:str=""):
    if len(title) > 0:
        print(title)
    for row in matrix:
        print(", ".join(f"{x}" for x in row))

def generate_y(X: List[List[float]], seed: int=42) -> List[float]:
    rand = Random(seed)

    n = len(X)
    p = len(X[0])

    w = [1.5] + [rand.uniform(-2.0, 2.0) for _ in range(p)]

    y = []
    for i in range(n):
        value = w[0]
        for j in range(p):
            value += w[j + 1] * X[i][j]
        value += rand.uniform(-0.05, 0.05)
        y.append(value)

    return y

if __name__ == "__main__":
    n = 3000
    p = 30

    X = generate_matrix(n, p)

    print_matrix(X, "Матрица X:")

    y = generate_y(X, seed=473132)

    result = linear_regression_normal_eq(X, y, intercept=True)

    print(f"Количество весов = {len(result.weights)} (w0 + {p})")
    print("Веса:", [f"{wi:.6f}" for wi in result.weights])

    x0 = X[0]
    y_hat0 = result.weights[0] + sum(result.weights[j + 1] * x0[j] for j in range(p))

    print(f"Предсказание для строки 0: ŷ = {y_hat0:.6f}, реальное значение y = {y[0]:.6f}")

Матрица X:
0.6394267984578837, 0.025010755222666936, 0.27502931836911926, 0.22321073814882275, 0.7364712141640124, 0.6766994874229113, 0.8921795677048454, 0.08693883262941615, 0.4219218196852704, 0.029797219438070344, 0.21863797480360336, 0.5053552881033624, 0.026535969683863625, 0.1988376506866485, 0.6498844377795232, 0.5449414806032167, 0.2204406220406967, 0.5892656838759087, 0.8094304566778266, 0.006498759678061017, 0.8058192518328079, 0.6981393949882269, 0.3402505165179919, 0.15547949981178155, 0.9572130722067812, 0.33659454511262676, 0.09274584338014791, 0.09671637683346401, 0.8474943663474598, 0.6037260313668911
0.8071282732743802, 0.7297317866938179, 0.5362280914547007, 0.9731157639793706, 0.3785343772083535, 0.552040631273227, 0.8294046642529949, 0.6185197523642461, 0.8617069003107772, 0.577352145256762, 0.7045718362149235, 0.045824383655662215, 0.22789827565154686, 0.28938796360210717, 0.0797919769236275, 0.23279088636103018, 0.10100142940972912, 0.2779736031100921, 0.63568444