# Предсказание цен на дома

## Библиотеки

In [1]:
import math
import keras
import numpy as np
from keras import layers
from keras.datasets import boston_housing

## Загрузка данных

In [2]:
(x_train, y_train), (x_test, y_test) = boston_housing.load_data()

## Подготовка данных

### Нормализация

In [3]:
def normalization(train_array: np.array, test_array: np.array):
    std = train_array.std(axis=0)
    mean = train_array.mean(axis=0)
    
    train_array_normalize = (train_array - mean) / std
    test_array_normalize = (test_array - mean) / std
    
    return (train_array_normalize, test_array_normalize)

In [4]:
x_train, x_test = normalization(x_train, x_train)

## Конструирование сети

In [5]:
x_train.shape

(404, 13)

In [6]:
def create_model():
    model = keras.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(13,)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    
    return model

## Оценка решения методом перекрестной проверки

In [7]:
k = 5 ## Количество обучений
train_size = math.ceil(x_train.shape[0] / k)
mse_history = []
mae_history = []

for i in range(k):
    current_x_train = x_train[train_size * i:train_size*(i + 1)]
    current_y_target = y_train[train_size * i:train_size*(i + 1)]                        

    current_x_test = np.concatenate([x_train[:train_size * i], x_train[train_size*(i + 1):]])
    current_y_test = np.concatenate([y_train[:train_size * i], y_train[train_size*(i + 1):]])
    
    model = create_model()
    model.fit(
        current_x_train,
        current_y_target,
        epochs=100,
        batch_size=64,
        verbose=0
    )
    mse, mae = model.evaluate(current_x_test, current_y_test)
    mse_history.append(mse)
    mae_history.append(mae)                    

2023-01-21 21:23:14.058724: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




In [8]:
np.mean(mse_history)

22.498163604736327

In [9]:
np.mean(mae_history)

3.217554140090942

## Выводы

Перекрестная проверка позволяет использовать небольшие наборы данных для обучения и проверки качества кода

[Продолжение](./%D0%9C%D0%B5%D1%82%D0%BE%D0%B4%D1%8B%20%D0%B1%D0%BE%D1%80%D1%8C%D0%B1%D1%8B%20%D1%81%20%D0%BF%D0%B5%D1%80%D0%B5%D0%BE%D0%B1%D1%83%D1%87%D0%B5%D0%BD%D0%B8%D0%B5%D0%BC.ipynb)