# Parsing flat

## Парсінг сторінки.
### Список отриманої інформації: ціна на квартиру, к-сть кімнат, площа квартири, район, адреса.

In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
flat_price = []
flat_room_count = [] #к-сть кімнат
flat_area = [] #площа квартири
flat_district = [] #район
flat_street = []  #адреса



def work_with_all_page():
    for i in range(1, 300):
        url = f'https://rieltor.ua/flats-rent/?page={i}'
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'lxml')
        print(url)
        work_with_one_page(soup)


def work_with_one_page(soup):
    for item in soup.findAll('div', class_ = "catalog-card-content"):
        find_price(item)
        find_room_count(item)
        find_area(item)
        find_district(item)
        find_street(item)


def find_price(item):
    price = item.find('div', class_= "catalog-card-price").find('strong').text
    price = make_standart_price(price)
    flat_price.append(price)

def make_standart_price(price):
    if price.split()[-1] == "грн/міс":
        new_price = "".join(filter(lambda x: x.isdigit(), price))
        new_price = int(new_price)
    elif price.split()[-1] == "$/міс":
        new_price = "".join(filter(lambda x: x.isdigit(), price))
        new_price = int(new_price)*37
    else:
        new_price = None
    return new_price



def find_room_count(item):
    room_count = item.findAll('div', class_ = 'catalog-card-details-row')[0].find('span').text
    room_count = make_standart_room_count(room_count)
    flat_room_count.append(room_count)

def make_standart_room_count(room_count):
    if room_count.split()[0] == 'Вільне':
        return 1
    else:
        return int(room_count.split()[0])



def find_area(item):
    area = item.findAll('div', class_ = 'catalog-card-details-row')[1].find('span').text
    area = make_standart_area(area)
    flat_area.append(area)

def make_standart_area(area):
    return float(area.split()[0])



def find_district(item):
    district = item.find('div', class_ = "catalog-card-region").findAll('a')[1].text
    district = make_standart_district(district)
    flat_district.append(district)

def make_standart_district(district):
    return district.split()[0]


def find_street(item):
    street = item.find('div', class_ = 'catalog-card-address').text
    street = make_standart_street(street)
    flat_street.append(street)

def make_standart_street(street):
    return street





work_with_all_page()

## Створення словника для подальшого збереження.

In [None]:
dict_flat = {'flat_street': flat_street, 
        'flat_price': flat_price, 
        'flat_room_count': flat_room_count, 
        'flat_area': flat_area, 
        'flat_district': flat_district}


## Збереження словника.

In [None]:
import pickle

with open('start_info_about_flat', 'wb') as file:
    pickle.dump(dict_flat, file)

file.close()

# Parsing metro

## Парсінг інформації про метро.

In [None]:
import requests
from bs4 import BeautifulSoup

url = 'https://locator.ua/near-me/stancii-metro/ru/'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')

list_metro_name = []
list_metro_address = []

def parsing():
    for item in soup.findAll('div', class_ = 'padding10px'):
        get_name(item)
        get_address(item)


def get_name(item):
    metro_name = item.find('div', class_ = 'title') 
    if metro_name is not None:
        if metro_name.text != "Здесь может быть ваша компания":
            list_metro_name.append(''.join(x+" " for x in metro_name.text.split()[:-3]))

def get_address(item):
    metro_address = item.find('div', class_ = 'descr') 
    if metro_address is not None:
        if metro_address.text != 'Locator.ua':
            metro_address = metro_address.text
            list_metro_address.append(metro_address.split(", тел.")[0])
        
parsing()




## Створення словника для подальшого збереження.

In [None]:
dict_metro = {
    'metro_name': list_metro_name,
    'metro_address': list_metro_address
}

## Перевірка відображення у датафреймі.

In [None]:
import pandas

df = pandas.DataFrame(dict_metro)
df.head(6)

## Збереження інформації про метро.

In [None]:
import pickle

with open('info_about_metro', 'wb') as file:
    pickle.dump(dict_metro, file)

file.close()

# Count distance

## Завантаження інформації.

In [None]:
import pickle
from geopy.geocoders import Nominatim

info_metro = pickle.load(open('info_about_metro', 'rb'))
info_flat = pickle.load(open('start_info_about_flat', 'rb'))

## Метод для отримання координат. 
#### Адреса квартир задана на укр.мові, адреса метро - на рос.мові.

In [None]:
def get_coordinate(street, language = 'uk'):
    try:
        if language == 'uk':
            street = "".join(x + " " for x in street.split()[:-2]) + ', Київ'
            geolocator = Nominatim(user_agent="my-applicatio121232n")
            location = geolocator.geocode(street, language="uk")
            return (location.latitude, location.longitude)
        if language == 'rus':
            street = "".join(x + " " for x in street.split()[1:]) + ', Киeв'
            geolocator = Nominatim(user_agent="my-applicati34343on")
            location = geolocator.geocode(street, language="ru")
            return (location.latitude, location.longitude)
    except AttributeError as e:
        return None

## Отримання координат кожного метро.

In [None]:
metro_coordinate = []

for street in info_metro['metro_address']:
    metro_coordinate.append(get_coordinate(street, language='rus'))

## Ручне встановлення пропущених значень.

In [None]:
metro_coordinate[-5] = (50.501450, 30.497799)

## Знаходження для кожної квартири дистанції до найближчого метро:
#### 1. Ітерації по списку адрес квартир.
#### 2. Знаходження координат квартири.
#### 3. Знаходження відстаней від квартири до кожного метро.
#### 4. Вибір найменшої дистанції.
#### 5. Додавання "дистанції" до словника з інформацією про квартири.
#### 6. Збереження словника.

In [None]:
from geopy.distance import geodesic

distance = []

def count_for_all_flat():
    for street in info_flat['flat_street']:
        distance.append(count_for_one_flat(street))

def count_for_one_flat(street):
    min_distance = 100
    flat_coord = get_coordinate(street)
    if flat_coord is None:
        return None
    else:
        for metro_coord in metro_coordinate:
            dist = geodesic(flat_coord, metro_coord).km
            if dist < min_distance:
                min_distance =  dist
        return min_distance

            
count_for_all_flat()
info_flat['distance'] = distance

import pickle

with open('info_about_flat', 'wb') as file:
    pickle.dump(info_flat, file)

file.close()

# Clear data

# Очищення інформації.

## Завантаження інформації про квартири.
#### Інформація вже з дистанцією.

In [None]:
import pickle 

info_flat = pickle.load(open('info_about_flat', 'rb'))

## Перетворення у датафрейм.

In [None]:
import pandas as pd

info_flat = pd.DataFrame(info_flat)

## Перший огляд.

#### Всього рядків: 5980. Всі null значення в колонці з дистанцією.

In [None]:
info_flat.describe()

## Заповнення null значень.
#### Пропущені значення замінюються на медіану по району.

In [None]:
info_flat['distance'] = info_flat.groupby('flat_district')['distance'].transform(lambda x: x.fillna(x.median()))

## Видалення викидів.
### Беремо лише ті квартири, ціна на котрі лежить між 25% і 75% квартилями.

In [None]:
q1 = info_flat['flat_price'].quantile(0.25)
q3 = info_flat['flat_price'].quantile(0.75)
iqr = q3 - q1

upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

info_flat = info_flat[(info_flat['flat_price'] > lower) & (info_flat['flat_price'] < upper)]

### Беремо лише ті квартири, дистанція до найближчого метро у котрих лежить між 25% і 75% квартилями.

In [None]:
q1 = info_flat['distance'].quantile(0.25)
q3 = info_flat['distance'].quantile(0.75)
iqr = q3 - q1

upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

info_flat = info_flat[(info_flat['distance'] > lower) & (info_flat['distance'] < upper)]

## Другий огляд.

In [None]:
info_flat.describe()

In [None]:
info_flat.head(5)

In [None]:
info_flat.info()

# Візуалізація.

In [None]:
import matplotlib.pyplot as plt

##  Відображення залежностей.
### 1. Ціна від площі
### 2. Ціна від дистанції до метро
### 3. Ціна від к-сті кімнат

In [None]:
fig, ax = plt.subplots(3, figsize = (15, 25))

plt.subplots_adjust(wspace= 5, hspace=0.2)

ax[0].scatter(info_flat['flat_area'], info_flat['flat_price'])
ax[0].set_xlabel('Price')
ax[0].set_ylabel('Area')
ax[0].set_title('Графік залежності')
ax[1].scatter(info_flat['distance'], info_flat['flat_price'])
ax[1].set_xlabel('Price')
ax[1].set_ylabel('Distance')
ax[1].set_title('Графік залежності')
ax[2].scatter(info_flat['flat_room_count'], info_flat['flat_price'])
ax[2].set_xlabel('Price')
ax[2].set_ylabel('Room count')
ax[2].set_title('Графік залежності')


## Графіки розподілу.
### 1. Розподіл цін.
### 2. Розподіл площ.
### 3. Розподіл к-сті кімнат.
### 4. Розподіл дистанції до метро.

In [None]:
fig, ax = plt.subplots(2, 2, figsize = (12, 10))

ax[0][0].hist(info_flat['flat_price'])
ax[0][0].set_title("Price")
ax[1][0].hist(info_flat['flat_area'])
ax[1][0].set_title("Area")
ax[0][1].hist(info_flat['flat_room_count'], )
ax[0][1].set_title("Room count")
ax[1][1].hist(info_flat['distance'])
ax[1][1].set_title("Distance")

## Збереження інформації про квартири.

In [None]:
with open('info_about_flat', 'wb') as file:
    pickle.dump(info_flat, file)

file.close()

# Find model

## Завантаження інформації.

In [None]:
import pickle
import pandas as pd

with open('info_about_flat', 'rb') as f:
    flat = pickle.load(f)


import warnings
warnings.filterwarnings("ignore")

# Підготовка методів

## Лійнійна регресія
### Без та з стандартизацією

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from sklearn.model_selection import cross_val_score

from sklearn.preprocessing import StandardScaler

def linear(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)

    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

def linear_with_scalar(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scalar = StandardScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_scaled, y_train)

    score_for_train = model.score(X_train_scaled, y_train)

    score_for_test = model.score(X_test_scaled, y_test)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, scalar.transform(X), y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()


## Лассо 
### Без та з стандартизацією

In [None]:
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from sklearn.preprocessing import StandardScaler

def lasso(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LassoCV(cv=5, random_state=42)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)

    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

def lasso_with_scalar(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scalar = StandardScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.transform(X_test)

    model = LassoCV(cv=5, random_state=42)
    model.fit(X_train_scaled, y_train)

    score_for_train = model.score(X_train_scaled, y_train)

    score_for_test = model.score(X_test_scaled, y_test)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    
    scores_CV = cross_val_score(model, scalar.transform(X), y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()


## Ридж 
### Без та з стандартизацією

In [None]:
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from sklearn.preprocessing import StandardScaler

def ridge(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RidgeCV(cv=5)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)
    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    
    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')
    
    return score_for_train, score_for_test, rmse, scores_CV.mean()

def ridge_with_scalar(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scalar = StandardScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.transform(X_test)
    
    model = RidgeCV(cv = 5)
    model.fit(X_train_scaled, y_train)

    score_for_train = model.score(X_train_scaled, y_train)

    score_for_test = model.score(X_test_scaled, y_test)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, scalar.transform(X), y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

## Еластик
### Без та з стандартизацією

In [None]:
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from sklearn.preprocessing import StandardScaler

def elastic(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = ElasticNetCV(cv=5, random_state=42)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)

    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    
    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')
    
    return score_for_train, score_for_test, rmse, scores_CV.mean()

def elastic_with_scalar(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scalar = StandardScaler()
    X_train_scaled = scalar.fit_transform(X_train)
    X_test_scaled = scalar.transform(X_test)
    
    model = ElasticNetCV(cv = 5, random_state=42)
    model.fit(X_train_scaled, y_train)

    score_for_train = model.score(X_train_scaled, y_train)

    score_for_test = model.score(X_test_scaled, y_test)

    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    
    scores_CV = cross_val_score(model, scalar.transform(X), y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()


## Дерева рішень

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def tree(X, y, deep):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = DecisionTreeRegressor(max_depth=deep)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)
    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5
    
    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

## Випадковий ліс

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def forest(X, y, i):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=i)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)
    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

## XGBoost

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def xgboost(X, y):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)

    score_for_train = model.score(X_train, y_train)
    score_for_test = model.score(X_test, y_test)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = mse**0.5

    scores_CV = cross_val_score(model, X, y, cv = 5, scoring='r2')

    return score_for_train, score_for_test, rmse, scores_CV.mean()

## Обробка
#### 1. Перевіряється кожна модель
#### 2. Перевіряється кожна степінь
#### 3. Результати (score для тренування і тестування, а також похибка передбаченн) відправляються у словнику

In [None]:
from sklearn.preprocessing import PolynomialFeatures

def find_model(X, y):
    model_name = []
    score_for_train = []
    score_for_test = []
    mean_sq_er = []
    score_CV = []

    for i in range(1, 4):

        poly = PolynomialFeatures(degree=i, include_bias=False)
        X_poly = poly.fit_transform(X)
    
        model_name.append(f"Linear regression, degree: {i}")
        result = linear(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Linear regression with standartscaler, degree: {i}")
        result = linear_with_scalar(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Lasso regression, degree: {i}")
        result = lasso(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Lasso regression with standartscaler, degree: {i}")
        result = lasso_with_scalar(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Ridge regression, degree: {i}")
        result = ridge(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Ridge regression with standartscaler, degree: {i}")
        result = ridge_with_scalar(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Elastic regression, degree: {i}")
        result = elastic(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

        model_name.append(f"Elastic regression with standartscaler, degree: {i}")
        result = elastic_with_scalar(X_poly, y)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

    for i in range(3, 13):
        model_name.append(f"DecisionTree, max_deep: {i}")
        result = tree(X, y, i)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])
    
    for i in range(1, 8):
        model_name.append(f"RandomForest, max_deep: {i}")
        result = forest(X, y, i)
        score_for_train.append(result[0])
        score_for_test.append(result[1])
        mean_sq_er.append(result[2])
        score_CV.append(result[3])

    model_name.append(f"XGBoost")
    result = xgboost(X, y)
    score_for_train.append(result[0])
    score_for_test.append(result[1])
    mean_sq_er.append(result[2])
    score_CV.append(result[3])


    return {'model_name': model_name, 
            'score_for_train': score_for_train,
            'score_for_test': score_for_test,
            'mean_sq_er': mean_sq_er,
            'score_CV': score_CV}

# Тестування 

## Варіант 1
### Район - dummies, к-сть кімнат - закодовані (без змін)

In [None]:
flat_1 = pd.get_dummies(flat, columns=['flat_district'])
flat_1 = flat_1.drop(columns=['flat_street'])

In [None]:
x_1 = flat_1.drop(columns=['flat_price'], inplace=False)
y_1 = flat_1['flat_price']

In [None]:
result_1 = find_model(x_1, y_1)

In [None]:
df_result_1 = pd.DataFrame(result_1)
df_result_1['variant'] = 1
df_result_1[(df_result_1['score_for_train'] > 0.55) & 
            (df_result_1['score_for_test'] > 0.55) & 
            (df_result_1['score_CV'] > 0.55)]

## Варіант 2
### Район - dummies, к-сть кімнат - dummies

In [None]:
flat_2 = pd.get_dummies(flat, columns=['flat_district', 'flat_room_count'])
flat_2 = flat_2.drop(columns=['flat_street'])

In [None]:
x_2 = flat_2.drop(columns=['flat_price'], inplace=False)
y_2 = flat_2['flat_price']

In [None]:
result_2 = find_model(x_2, y_2)

In [None]:
df_result_2 = pd.DataFrame(result_2)
df_result_2['variant'] = 2
df_result_2[(df_result_2['score_for_train'] > 0.55) & 
            (df_result_2['score_for_test'] > 0.55) & 
            (df_result_2['score_CV'] > 0.55)]

## Варіант 3
### Райони - закодовані, к-сть кімнат - закодовані (без змін)

In [None]:
flat_3 = flat
flat_3['prestige'] = flat_3['flat_district'].map({"Голосіївський": 2, 
                                              "Оболонський": 3, 
                                              "Печерський": 4,
                                              "Подільський": 3,
                                              "Святошинський": 2,
                                              "Солом'янський": 2,
                                              "Шевченківський": 3,
                                              "Дарницький": 1,
                                              "Деснянський": 1,
                                              "Дніпровський": 1})

flat_3 = flat_3.drop(columns=['flat_street', 'flat_district'])

In [None]:
x_3 = flat_3.drop(columns=['flat_price'])
y_3 = flat_3['flat_price']

In [None]:
result_3 = find_model(x_3, y_3)

In [None]:
df_result_3 = pd.DataFrame(result_3)
df_result_3['variant'] = 3
df_result_3[(df_result_3['score_for_train'] > 0.55) & 
            (df_result_3['score_for_test'] > 0.55) & 
            (df_result_3['score_CV'] > 0.55)]

## Варіант 4
### Райони - закодовані, к-сть кімнат - dummies

In [None]:
flat_4 = flat
flat_4['prestige'] = flat_4['flat_district'].map({"Голосіївський": 2, 
                                              "Оболонський": 3, 
                                              "Печерський": 4,
                                              "Подільський": 3,
                                              "Святошинський": 2,
                                              "Солом'янський": 2,
                                              "Шевченківський": 3,
                                              "Дарницький": 1,
                                              "Деснянський": 1,
                                              "Дніпровський": 1})

flat_4 = flat_4.drop(columns=['flat_street', 'flat_district'])
flat_4 = pd.get_dummies(flat_4, columns=['flat_room_count'])

In [None]:
x_4 = flat_4.drop(columns=['flat_price'])
y_4 = flat_4['flat_price']

In [None]:
result_4 = find_model(x_4, y_4)

In [None]:
df_result_4 = pd.DataFrame(result_4)
df_result_4['variant'] = 4
df_result_4[(df_result_4['score_for_train'] > 0.55) & 
            (df_result_4['score_for_test'] > 0.55) &
            (df_result_4['score_CV'] > 0.55)]

## Візуалізація тестування

In [None]:
result = pd.concat([df_result_1, df_result_2, df_result_3, df_result_4])
result.info()

In [None]:

result = result[(result['score_for_train'] > 0.55) &
                (result['score_for_test'] > 0.55) &
                (result['score_CV'] > 0.55) &
                (result['mean_sq_er'] > 0)]
result = result.reset_index(drop=True)
result.info()

# Збереження результатів моделей

In [None]:
with open('result', 'wb') as file:
    pickle.dump(result, file)

file.close()