## Подключение библиотек

In [23]:
import numpy as np
import tensorflow as tf
import tensorflow_decision_forests as tfdf
import pandas as pd
import matplotlib.pyplot as plt

## Загрузка базы для обучения

In [133]:
train_file_path = "test_data/test (4).xlsx"
dataset = pd.read_excel(train_file_path)
label = 'price'
dataset.head()

dataset.drop('url', axis=1, inplace=True)
dataset.drop('deal_type', axis=1, inplace=True)
dataset.drop('price_per_month', axis=1, inplace=True)
dataset.drop('commissions', axis=1, inplace=True)
dataset.drop('house_number', axis=1, inplace=True)
dataset.drop('author_type', axis=1, inplace=True)
dataset.drop(columns=dataset.columns[0], axis=1, inplace=True)

dataset.head()

Unnamed: 0,author,location,accommodation_type,floor,floors_count,rooms_count,total_meters,price,district,street,underground,residential_complex,date
0,КСМ,Киров,flat,,,,51.8,4366740,Первомайский,бульвар Прибрежный,,Ривер Парк,
1,Нововятск Недвижимость,Киров,flat,4.0,9.0,3.0,62.0,6890000,Первомайский,Володарского,,,
2,КСМ,Киров,flat,,,,57.5,5922500,Ленинский,,,Чистые Пруды,
3,Центр Плюс,Киров,flat,2.0,5.0,3.0,82.3,7390000,Первомайский,Ленина,,,
4,КСМ,Киров,flat,,,,67.7,7252024,Ленинский,Энтузиастов,,LIFE,


In [119]:
#Оценка стоимости квартиры в стоимости золота
gold_course_path = "test_data/gold.xlsx"
gold = pd.read_excel(gold_course_path)
gold.head()

Unnamed: 0,Дата,Значение
0,2024-03-30,6575.81
1,2024-03-29,6504.24
2,2024-03-28,6489.04
3,2024-03-27,6478.59
4,2024-03-26,6477.49


In [134]:
#Перевод из рублей в золото
def rubles_to_gold(row):
    actual_date = ""
    if pd.isna(row['date']):
        actual_date = "2023-09-01"
    else:
        actual_date = row['date']
    gold_cost = gold.loc[gold['Дата']==actual_date]['Значение']
    return row['price'] / gold_cost

dataset['price'] = dataset.apply(rubles_to_gold, axis=1)
dataset.head()

Unnamed: 0,author,location,accommodation_type,floor,floors_count,rooms_count,total_meters,price,district,street,underground,residential_complex,date
0,КСМ,Киров,flat,,,,51.8,723.930245,Первомайский,бульвар Прибрежный,,Ривер Парк,
1,Нововятск Недвижимость,Киров,flat,4.0,9.0,3.0,62.0,1142.243273,Первомайский,Володарского,,,
2,КСМ,Киров,flat,,,,57.5,981.848445,Ленинский,,,Чистые Пруды,
3,Центр Плюс,Киров,flat,2.0,5.0,3.0,82.3,1225.134657,Первомайский,Ленина,,,
4,КСМ,Киров,flat,,,,67.7,1202.260614,Ленинский,Энтузиастов,,LIFE,


In [135]:
def split_dataset(dataset, test_ratio=0.30):
    test_indices = np.random.rand(len(dataset)) < test_ratio
    return dataset[~test_indices], dataset[test_indices]


train_ds_pd, test_ds_pd = split_dataset(dataset)
print("{} примеров для тренировки, {} примеров для теста.".format(len(train_ds_pd), len(test_ds_pd)))

296 примеров для тренировки, 128 примеров для теста.


## Создание и обучение модели

In [139]:
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label, task=tfdf.keras.Task.REGRESSION)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label, task=tfdf.keras.Task.REGRESSION)

In [140]:
tuner = tfdf.tuner.RandomSearch(num_trials=100, use_predefined_hps=True)
model = tfdf.keras.GradientBoostedTreesModel(task = tfdf.keras.Task.REGRESSION)
model.compile(metrics=["mse"])
model.fit(x=train_ds, verbose=1, tuner=tuner)

Use /tmp/tmpznbjcr2s as temporary training directory
Reading training dataset...




Training dataset read in 0:00:00.310945. Found 296 examples.
Training model...
Model trained in 0:00:00.150522
Compiling model...


[INFO 24-03-30 16:40:49.2189 UTC kernel.cc:1233] Loading model from path /tmp/tmpznbjcr2s/model/ with prefix 001ec44b18b44a58
[INFO 24-03-30 16:40:49.2217 UTC abstract_model.cc:1344] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 24-03-30 16:40:49.2217 UTC kernel.cc:1061] Use fast generic engine


Model compiled.


<tf_keras.src.callbacks.History at 0x7a9e2fecd870>

##  Оценка и сохранение модели

In [143]:
evaluation = model.evaluate(test_ds, return_dict=True)
print('rmse:', evaluation['mse'] ** 0.5)

rmse: 410.6083520217289


In [132]:
tfdf.model_plotter.plot_model_in_colab(model, tree_idx=0, max_depth=3)

In [144]:
model.save('models/model1')