In [1]:
from typing import Any, Dict, Tuple

import numpy as np
from numpy import floating
from typing import

from sklearn.datasets import make_regression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:

def mean_squared_error(actual: np.ndarray, predicted: np.ndarray) -> floating[Any]:
    """Calc MSE"""
    return np.mean((actual - predicted)**2)


def root_mean_squared_error(actual: np.ndarray, predicted: np.ndarray) -> floating[Any]:
    """Calc RMSE"""
    return np.sqrt(np.mean((actual - predicted)**2))


def mean_absolute_error(actual: np.ndarray, predicted: np.ndarray) -> floating[Any]:
    """Calc MAE"""
    return np.mean(np.abs(actual - predicted))


def mean_absolute_percentage_error(actual: np.ndarray, predicted: np.ndarray) -> floating[Any]:
    """Calc MAPE"""
    return np.mean(100 * np.abs((actual - predicted) / actual))

def r_squared(actual: np.ndarray, predicted: np.ndarray) -> floating[Any]:
    """Calc R^2"""
    actual_mean = np.mean(actual)
    return 1-(np.sum((actual - predicted)**2) / np.sum((actual-actual_mean)**2))

In [7]:
def test():
    actual = np.array([3, -0.5, 2, 7])
    predicted = np.array([2.5, 0.0, 2, 8])

    assert np.allclose(mean_squared_error(actual, predicted), 0.375)
    assert np.allclose(root_mean_squared_error(actual, predicted), 0.6123724356957945)
    assert np.allclose(mean_absolute_error(actual, predicted), 0.5)
    assert np.allclose(
        mean_absolute_percentage_error(actual, predicted), 32.73809523809524
    )
    assert np.allclose(r_squared(actual, predicted), 0.9486081370449679)

    print("All tests passed.")


if __name__ == "__main__":
    test()

All tests passed.


In [8]:
actual = np.array([3, -0.5, 2, 7])
predicted = np.array([2.5, 0.0, 2, 8])

In [9]:
assert np.allclose(mean_squared_error(actual, predicted), 0.375)

In [10]:
r_squared(actual, predicted)

0.9486081370449679

In [None]:


def prepare_data():
    X, y = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)
    return X, y


def solution(data: Tuple[np.ndarray, np.ndarray]) -> Dict[str, np.ndarray]:
    """
    Function to train a regression model and calculate metrics.

    Returns:
        dict: Dictionary with metrics.

    Examples:
        >>> solution()
        {
            'y_pred': array([  2.309,  -0.654,  -0.288,   1.237]),
            'mse': 0.039,
            'mae': 0.152,
            'r2': 0.948
        }
    """

    X, y = data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # YOUR CODE HERE
    line_reg = LinearRegression()
    line_reg.fit(X_train, y_train)

    y_pred = line_reg.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    return {
        "y_pred": y_pred,
        "y_test": y_test,
        "mse": mse,
        "mae": mae,
        "r2": r2,
    }


if __name__ == "__main__":
    data = prepare_data()
    metrics = solution(data)
    print("MSE:", metrics["mse"])
    print("MAE:", metrics["mae"])
    print("R2:", metrics["r2"])
