# MlOps. Задание 5

## 1. Импорт библиотек

In [143]:
import numpy as np
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

## 2. Генерация данных

In [144]:
def add_anomalies(arr, anomaly_percent, anomaly_value):
    if anomaly_percent > 1 or anomaly_percent < 0:
        return arr

    anomaly_count = int(len(arr) * anomaly_percent)
    
    mask = np.full(len(arr), False)
    mask[:anomaly_count] = True
    np.random.shuffle(mask)

    arr += (np.random.random(len(arr)) * 2 - 1) * anomaly_value * mask

    return arr

def generate_data(min_value, max_value, count, random_value, anomaly_percent=None, anomaly_value=None):
    x = np.linspace(min_value, max_value, count)
    y = x + (np.random.random(count) * 2 - 1) * random_value

    if anomaly_percent is not None and anomaly_value is not None:
        y = add_anomalies(y, anomaly_percent, anomaly_value)

    return x.reshape(-1, 1), y

In [145]:
train_data = generate_data(0, 1, 250, 0.2)
other_data1 = generate_data(0, 1, 250, 0.2)
other_data2 = generate_data(0, 1, 250, 0.2)
anomaly_data = generate_data(0, 1, 250, 0.2, 0.3, 2)

np.save(f'train_x', train_data[0])
np.save(f'train_y', train_data[1])

np.save(f'other1_x', other_data1[0])
np.save(f'other1_y', other_data1[1])

np.save(f'other2_x', other_data2[0])
np.save(f'other2_y', other_data2[1])

np.save(f'anomaly_x', anomaly_data[0])
np.save(f'anomaly_y', anomaly_data[1])


## 3. Обучение модели

In [146]:
model = LinearRegression().fit(train_data[0], train_data[1])

with open('model.pkl','wb') as f:
    pickle.dump(model, f)

## 4. Тестирование

In [147]:
%%writefile "test.py"

import pickle
import numpy as np
import pytest


from sklearn.metrics import r2_score, mean_squared_error

@pytest.mark.parametrize("data_name", ['train', 'other1', 'other2', 'anomaly'])
def test_model(data_name):
    with open('model.pkl', 'rb') as f:
        model = pickle.load(f)

    x, y = np.load(f'{data_name}_x.npy'), np.load(f'{data_name}_y.npy')

    y_pred = model.predict(x)

    r2 = r2_score(y, y_pred)
    mse = mean_squared_error(y, y_pred)

    assert r2 > 0.8
    assert mse < 0.05

    print(f'Dataset: {data_name}. Success!')

Overwriting test.py


In [148]:
! pytest -v test.py

platform win32 -- Python 3.10.11, pytest-7.4.3, pluggy-1.3.0 -- E:\UrfuMaga\VENV\Scripts\python.exe
cachedir: .pytest_cache
rootdir: E:\UrfuMaga\MlOps\Task5
plugins: anyio-3.7.1
[1mcollecting ... [0mcollected 4 items

test.py::test_model[train] [32mPASSED[0m[32m                                        [ 25%][0m
test.py::test_model[other1] [32mPASSED[0m[32m                                       [ 50%][0m
test.py::test_model[other2] [32mPASSED[0m[32m                                       [ 75%][0m
test.py::test_model[anomaly] [31mFAILED[0m[31m                                      [100%][0m

[31m[1m_____________________________ test_model[anomaly] _____________________________[0m

data_name = 'anomaly'

    [37m@pytest[39;49;00m.mark.parametrize([33m"[39;49;00m[33mdata_name[39;49;00m[33m"[39;49;00m, [[33m'[39;49;00m[33mtrain[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mother1[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mother2[39;49;00m[33m'[3