In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation, Flatten
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.activations import linear, relu
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler

%matplotlib inline

In [16]:
df = pd.read_excel('dataset/cleaned_data/cleaned_building_v2.xlsx')

df = df.drop(df.sort_values(by='price', ascending=True).head(200).index.values.tolist())
df = df.drop(df.sort_values(by='price', ascending=False).head(1000).index.values.tolist())

df = df.drop(df.sort_values(by='Потолки', ascending=False).head(10).index.values.tolist())

df = df.drop(df.sort_values(by='Общая площадь, м²', ascending=True).head(160).index.values.tolist())
df = df.drop(df.sort_values(by='Общая площадь, м²', ascending=False).head(40).index.values.tolist())

df = df.drop(df[df['статус']== 0].index.values.tolist())
df = df.drop(df[df['статус']== -1].index.values.tolist())

df = df.drop(['решетки на окнах', 'Год постройки (сдачи в эксплуатацию)','region', 'nan', 'В залоге', 'Кол-во телефонных линий', 'через TV кабель', 'ADSL', 'проводной', 'регион', 'статус'], axis=1)
df = df.astype(float)

df['price'] = np.log1p(df['price'])
df = df.reset_index(drop=True)

X = df.drop(['price'], axis=1)
y = df['price']

Scaler = StandardScaler()
mse = MeanSquaredError()
mae = MeanAbsoluteError()

X.loc[-1] = [214., 1., 1., 2., 2.7, 0., 0., 0. , 0. ,0. , 1.]

X = Scaler.fit_transform(X)

In [18]:
X[-1].shape

(11,)

In [None]:
df = pd.read_excel('dataset/cleaned_data/cleaned_building_v2.xlsx')

In [None]:
df = df.drop(df.sort_values(by='price', ascending=True).head(200).index.values.tolist())
df = df.drop(df.sort_values(by='price', ascending=False).head(1000).index.values.tolist())

df = df.drop(df.sort_values(by='Потолки', ascending=False).head(10).index.values.tolist())

df = df.drop(df.sort_values(by='Общая площадь, м²', ascending=True).head(160).index.values.tolist())
df = df.drop(df.sort_values(by='Общая площадь, м²', ascending=False).head(40).index.values.tolist())

In [None]:
df = df.drop(df[df['статус']== 0].index.values.tolist())
df = df.drop(df[df['статус']== -1].index.values.tolist())

In [None]:
df = df.drop(['решетки на окнах', 'Год постройки (сдачи в эксплуатацию)','region', 'nan', 'В залоге', 'Кол-во телефонных линий', 'через TV кабель', 'ADSL', 'проводной', 'регион', 'статус'], axis=1)
df = df.astype(float)

In [None]:
df['price'] = np.log1p(df['price'])
df = df.reset_index(drop=True)
df

In [None]:
X = df.drop(['price'], axis=1)
y = df['price']

In [None]:
Scaler = StandardScaler()
mse = MeanSquaredError()
mae = MeanAbsoluteError()

In [None]:
X = Scaler.fit_transform(X)

In [None]:
X_train, _1 , y_train, _2 = train_test_split(X,y, test_size=0.1)
X_test, X_val, y_test, y_val = train_test_split(_1, _2, test_size=0.5)

print(X_train.shape, X_val.shape, X_test.shape)

In [None]:
callback_patience = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

In [None]:
class myModel(tf.keras.Model):
    def __init__(self, units=32, activation='relu', **kwargs):
        super(myModel, self).__init__(**kwargs)
        
        self.hidden1 = Dense(units, activation=activation)
        self.hidden2 = Dense(units, activation=activation)
        
        self.hidden3 = Dense(units, activation=activation)
        self.hidden4 = Dense(units, activation=activation)
        
        self.hidden5 = Dense(units, activation=activation)
        self.hidden6 = Dense(units, activation=activation)
        
        self.hidden7 = Dense(units, activation=activation)
        self.hidden8 = Dense(units * 2, activation=activation)
        
        self.batch_normalization = tf.keras.layers.BatchNormalization()
        self.dropout = tf.keras.layers.Dropout(0.1)
        self.add = tf.keras.layers.Add()
        self.multiply = tf.keras.layers.Multiply()
    
        self.main_output = Dense(1)
        
    def call(self, inputs):
        
        x_1 = self.hidden1(inputs)
        x = self.hidden2(x_1)
        
        x = self.dropout(x)
        
        x = self.add([x, x_1])
        
        x =  self.multiply([x, x])
        
        x = self.hidden3(x)
        x = self.hidden4(x)
        
        x_5 = self.hidden5(x)
        x = self.hidden6(x_5)
        
        x = self.batch_normalization(x)
        x = self.dropout(x)
        
        x = self.add([x, x_5])
        
        x =  self.multiply([x, x])
        
        x = self.hidden7(x)
        x = self.hidden8(x)
        
        main_output = self.main_output(x)
        return main_output


In [None]:
tf.random.set_seed(42)
np.random.seed(42)

In [None]:
model = myModel()

In [None]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')

history = model.fit(
                    X_train, y_train, epochs=300, batch_size=16,
                    callbacks = [callback_patience], 
                    validation_data=(X_val, y_val)
                   )

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.evaluate(X_train, y_train)

In [None]:
y_pred_train = model.predict(X_train)
# y_pred_train = y_pred_train - (y_pred_train % 100000)

y_pred_test = model.predict(X_test)
# y_pred_test = y_pred_test - (y_pred_test % 100000)

print(f"Mean Squared Error - train: {tf.math.sqrt(mse(y_train, y_pred_train))}")
print(f"Mean Squared Error - test: {tf.math.sqrt(mse(y_test, y_pred_test))}\n")

print(f"Mean Absolute Error - train: {mae(y_test, y_pred_test)}")
print(f"Mean Absolute Error - test: {mae(y_train, y_pred_train)}\n")

print(f"Mean Absolute Error - test: {mae(tf.math.exp(y_test), tf.math.exp(y_pred_test))}")
print(f"Mean Absolute Error - train: {mae(tf.math.exp(y_train), tf.math.exp(y_pred_train))}")

In [None]:
print(f"Mean Absolute Error - test: {mae(tf.math.exp(y_test), tf.math.exp(y_pred_test))}")
print(f"Mean Absolute Error - train: {mae(tf.math.exp(y_train), tf.math.exp(y_pred_train))}")

In [None]:
history_df = pd.DataFrame(history.history)
history_df.plot(figsize=(5,5))
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

In [None]:
# plt.figure(figsize=(7,4))
plt.scatter(df['Общая площадь, м²'], y, c='b', label='trainin_data')
plt.scatter(df['Общая площадь, м²'], model.predict(X), c='r', label='predicted_data')
plt.legend()

In [None]:
y_pred = model.predict(X)
y_pred = tf.math.exp(y_pred)

y_true = tf.math.exp(y)

mae(y_true, y_pred)

In [None]:
model.predict(np.expand_dims(X_train[0], 0)), y_train[0]

In [None]:
model.save(r'C:\Users\karib\Desktop\diploma\models\model_v2')

In [None]:
import tensorflow as tf

In [None]:
nn = tf.keras.models.load_model(r'C:\Users\karib\Desktop\diploma\models\model_v2')

In [None]:
nn.predict(np.expand_dims(X_train[0], 0)), y_train[0]

In [None]:
np.expand_dims(X_train[0], 0).shape

In [None]:
X_train[0]

In [None]:
a = np.array([-0.55226414, -0.00756485,  0.06108979,  0.71995724, -1.09164684,
0.74222731, -0.75557363, -0.57774504, -0.83818919, -0.8229088 ,
       -0.29829498])

In [None]:
x

In [None]:
np.expand_dims(a, 0)

In [None]:
np.array([1,1,1,1,1,1,1,1,1,1,1]).shape

In [None]:
np.array(df.loc[0])

In [None]:
a = np.array([214., 1., 1., 2., 2.7, 0., 0., 0. , 0. ,0. , 1.])

In [None]:
np.expand_dims(a, 1).shape

In [None]:
XX = df.drop(['price'], axis=1)

In [None]:
Scaler.fit_transform(XX.loc[0].values.reshape(-1,1))

In [None]:
Scaler.fit_transform(XX.loc[:4])

In [None]:
Scaler.fit_transform(np.expand_dims(a, 1))

In [19]:
df.loc[0]

Общая площадь, м²        214.000000
Количество уровней         1.000000
Тип строения               1.000000
Состояние                  2.000000
Потолки                    2.700000
price                     19.519293
Парковка                   0.000000
пожарная сигнализация      0.000000
круглосуточная охрана      0.000000
видеонаблюдение            0.000000
сигнализация               0.000000
оптика                     1.000000
Name: 0, dtype: float64

In [20]:
X[0]

array([-0.87847107, -1.20229431,  0.06107931, -0.19441336, -1.09123809,
       -1.34664826, -0.75537082, -0.57761336, -0.83794535, -0.82267298,
        3.34539484])