In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import pickle


df = pd.read_csv('df_train.csv')

df_clean = df.dropna()

label_encoder = LabelEncoder()
df_clean['city_encoded'] = label_encoder.fit_transform(df_clean['city'])

q1, q2 = df_clean['price'].quantile([0.33, 0.66])
def price_to_class(p):
    if p <= q1:
        return 0
    if p <= q2:
        return 1
    return 2

df_clean['price_class'] = df_clean['price'].apply(price_to_class)

feature_columns = ['area', 'rooms_num', 'latitude', 'longitude', 'city_encoded', 'has_balcony']
X = df_clean[feature_columns].values
y = df_clean['price_class'].values


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(3, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nНачинаю обучение нейросети...")
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=16,
    verbose=1,
    validation_split=0.1
)

loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f'\nТочность на тестовых данных: {acc:.2%}')

model.save('house_price_model_real.h5')
with open('house_scaler_real.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print('\nМодель и scaler сохранены в файлы с суффиксом "_real"!')

print('\n' + '='*50)
print('ПРИМЕР ПРЕДСКАЗАНИЯ ДЛЯ 3 НОВЫХ КВАРТИР')
print('='*50)

new_houses = np.array([
    [50.0,  2, 56.30, 43.95, 2, 1],
    [65.0,  3, 53.20, 50.15, 1, 0],
    [90.0,  4, 45.04, 38.98, 0, 1],
])

new_houses_scaled = scaler.transform(new_houses)
predictions = model.predict(new_houses_scaled, verbose=0)
predicted_classes = np.argmax(predictions, axis=1)
class_names = {0: 'дёшево', 1: 'средне', 2: 'дорого'}

print('\nРезультаты предсказаний:')
print('-'*50)
city_names = {0: 'Краснодар', 1: 'Самара', 2: 'Нижний Новгород'}

for i, (house, pred_class, probs) in enumerate(zip(new_houses, predicted_classes, predictions), 1):
    print(f'\nКвартира {i}:')
    print(f'  Город: {city_names.get(int(house[4]), "Unknown")}')
    print(f'  Площадь: {house[0]:.1f} м²')
    print(f'  Комнат: {int(house[1])}')
    print(f'  Балкон: {"да" if house[5] == 1 else "нет"}')
    print(f'  Предсказанный класс: {pred_class} ({class_names[pred_class]})')
    print(f'  Вероятности:')
    for j, prob in enumerate(probs):
        print(f'    - {class_names[j]}: {prob:.2%}')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['city_encoded'] = label_encoder.fit_transform(df_clean['city'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['price_class'] = df_clean['price'].apply(price_to_class)



Начинаю обучение нейросети...
Epoch 1/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3931 - loss: 1.0545 - val_accuracy: 0.6690 - val_loss: 0.7954
Epoch 2/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6676 - loss: 0.7955 - val_accuracy: 0.6736 - val_loss: 0.6981
Epoch 3/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6739 - loss: 0.7178 - val_accuracy: 0.6851 - val_loss: 0.6716
Epoch 4/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6843 - loss: 0.6936 - val_accuracy: 0.6782 - val_loss: 0.6574
Epoch 5/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6890 - loss: 0.6854 - val_accuracy: 0.6966 - val_loss: 0.6499
Epoch 6/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6928 - loss: 0.6755 - val_accuracy: 0.6943 - val_loss: 0.64




Точность на тестовых данных: 70.99%

Модель и scaler сохранены в файлы с суффиксом "_real"!

ПРИМЕР ПРЕДСКАЗАНИЯ ДЛЯ 3 НОВЫХ КВАРТИР

Результаты предсказаний:
--------------------------------------------------

Квартира 1:
  Город: Нижний Новгород
  Площадь: 50.0 м²
  Комнат: 2
  Балкон: да
  Предсказанный класс: 1 (средне)
  Вероятности:
    - дёшево: 12.36%
    - средне: 59.04%
    - дорого: 28.60%

Квартира 2:
  Город: Самара
  Площадь: 65.0 м²
  Комнат: 3
  Балкон: нет
  Предсказанный класс: 0 (дёшево)
  Вероятности:
    - дёшево: 76.88%
    - средне: 16.38%
    - дорого: 6.73%

Квартира 3:
  Город: Краснодар
  Площадь: 90.0 м²
  Комнат: 4
  Балкон: да
  Предсказанный класс: 2 (дорого)
  Вероятности:
    - дёшево: 0.07%
    - средне: 3.35%
    - дорого: 96.58%
