In [1]:
import pandas as pd
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

---

<h3 style="text-align: center;">Предобработка данных</h3>

---

In [2]:
ds = pd.read_csv('../data/r_1_SPB_clear_w_time_to_m.csv',index_col=0)

In [3]:
ds = ds.drop(['m_lat','m_long','adress','Стиральная машина','district','underground','Интернет','Санузел','id'],axis = 1)
ds = ds.drop(ds[ds['Балкон/лоджия'].isin(['2 лоджии','2 балкона'])].index)
ds.reset_index(inplace=True, drop=True)
ds['first_floor'] = ds['floor'].apply(lambda x: 1 if x == 1 else 0)
ds = ds.drop(['floor'],axis = 1)
ds['time_to_metro'] = ds['time_to_metro'].apply(lambda x: round(x))
ds.price_per_month = np.log(ds.price_per_month.values)

In [4]:
ds_shuffled = ds.sample(len(ds),random_state= 66).reset_index(drop=True)
ds_train = ds_shuffled
# ds_train = ds_shuffled[:int(len(ds)*0.85)]
# ds_test = ds_shuffled[int(len(ds)*0.85):]

In [5]:
Y_train = ds_train['price_per_month'].values.reshape((-1,1))
lat_tr = ds_train['a_lat'].values
long_tr = ds_train['a_long'].values
cords_tr = list(zip(lat_tr,long_tr))
X_train = ds_train.drop(['price_per_month','time_to_rent','a_lat','a_long'],axis = 1)

In [17]:
# Y_test = ds_test['price_per_month'].values.reshape((-1,1))
# lat_ts = ds_test['a_lat'].values
# long_ts = ds_test['a_long'].values
# cords_ts = list(zip(lat_ts,long_ts))
# X_test = ds_test.drop(['price_per_month','time_to_rent','a_lat','a_long'],axis = 1)

In [6]:
bin_cols = list(X_train.nunique()[X_train.nunique() == 2].index)
bin_cols

['Холодильник',
 'Телевизор',
 'Посудомоечная машина',
 'Кондиционер',
 'first_floor']

In [7]:
num_cols = list(X_train.drop(bin_cols,axis =1).select_dtypes(include = 'number').columns)
num_cols

['floors_count',
 'total_meters',
 'Площадь кухни',
 'Высота потолков',
 'Год постройки',
 'time_to_metro']

In [8]:
non_cat_cols = num_cols.copy()
non_cat_cols.extend(bin_cols)
cat_cols = list(X_train.drop(non_cat_cols,axis =1).columns)
cat_cols

['Балкон/лоджия', 'Вид из окон', 'Ремонт', 'Тип дома', 'Парковка']

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat',OneHotEncoder() ,cat_cols),
        ('num', 'passthrough', num_cols),
        ('bin','passthrough',bin_cols)
    ])

In [10]:
X_train_norm = preprocessor.fit_transform(X_train)
# X_test_norm = preprocessor.transform(X_test)
X_train_norm.shape

(812, 30)

In [11]:
col_names = []
for item in preprocessor.get_feature_names_out():
    col_names.append(item.split('__')[1])

In [12]:
X_train = pd.DataFrame(X_train_norm,columns=col_names)
# X_test = pd.DataFrame(X_test_norm,columns=col_names)

In [13]:
for col in X_train.columns:
    for i in range(len(X_train)):
        if X_train[col][i] == 0:
            X_train[col][i] = random.uniform(0, 0.0005)

In [14]:
X_train

Unnamed: 0,Балкон/лоджия_1 балкон,Балкон/лоджия_1 лоджия,"Балкон/лоджия_1 лоджия, 1 балкон",Балкон/лоджия_нет балкона,Вид из окон_Во двор,Вид из окон_На улицу,Вид из окон_На улицу и двор,Ремонт_Дизайнерский,Ремонт_Евроремонт,Ремонт_Косметический,...,total_meters,Площадь кухни,Высота потолков,Год постройки,time_to_metro,Холодильник,Телевизор,Посудомоечная машина,Кондиционер,first_floor
0,1.000000,0.000065,0.000500,0.000266,1.000000,0.000265,0.000107,0.000187,1.000000,0.000249,...,45.4,8.5,2.500000,1974.0,11.0,1.000000,1.000000,1.000000,0.000072,0.000223
1,0.000399,1.000000,0.000052,0.000334,1.000000,0.000131,0.000279,0.000471,1.000000,0.000388,...,48.0,13.0,3.000000,2016.0,3.0,1.000000,1.000000,0.000402,1.000000,0.000303
2,0.000115,1.000000,0.000423,0.000447,1.000000,0.000376,0.000493,0.000349,0.000236,1.000000,...,37.0,8.0,2.600000,1977.0,16.0,1.000000,1.000000,0.000180,0.000379,0.000034
3,0.000004,1.000000,0.000003,0.000482,0.000297,1.000000,0.000383,0.000334,1.000000,0.000377,...,37.8,10.5,2.672774,2022.0,54.0,1.000000,0.000068,0.000416,0.000378,0.000451
4,1.000000,0.000148,0.000476,0.000024,0.000360,0.000454,1.000000,0.000295,1.000000,0.000084,...,48.8,14.0,3.000000,2004.0,15.0,0.000002,0.000069,0.000151,0.000033,0.000468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
807,0.000314,1.000000,0.000125,0.000398,0.000214,1.000000,0.000498,0.000344,1.000000,0.000195,...,42.0,12.0,2.672774,2011.0,14.0,1.000000,1.000000,0.000395,0.000307,0.000142
808,0.000375,1.000000,0.000243,0.000482,1.000000,0.000494,0.000069,1.000000,0.000081,0.000171,...,45.0,17.0,3.000000,2022.0,42.0,1.000000,1.000000,0.000236,0.000340,0.000277
809,1.000000,0.000498,0.000343,0.000379,1.000000,0.000176,0.000399,0.000024,0.000377,1.000000,...,35.5,8.0,2.672774,1990.0,12.0,1.000000,0.000021,0.000156,0.000403,0.000427
810,1.000000,0.000171,0.000476,0.000215,1.000000,0.000404,0.000304,0.000343,1.000000,0.000248,...,45.0,10.0,2.800000,2008.0,11.0,1.000000,1.000000,0.000429,0.000448,0.000468


In [15]:
X_train_ = X_train.values
# X_test_ = X_test.values

In [16]:
Y_train.shape

(812, 1)

In [17]:
X_train_.shape

(812, 30)

---

<h3 style="text-align: center;">Строим ГВР</h3>

---

In [18]:
gwr_selector = Sel_BW(cords_tr, Y_train, X_train_, spherical = True,fixed=False)# fixed = false -> bandwidth это число ближайших соседей
gwr_bw = gwr_selector.search()

In [19]:
print('GWR bandwidth =', gwr_bw)

GWR bandwidth = 429.0


In [39]:
np.mean(Y_train)

10.3809068699038

In [20]:
gwr_results = GWR(cords_tr, Y_train, X_train_, gwr_bw,name_x = col_names).fit()
gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                 812
Number of covariates:                                                    31

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             26.611
Log-likelihood:                                                     235.594
AIC:                                                               -409.188
AICc:                                                              -404.477
BIC:                                                              -5205.698
R2:                                                                   0.658
Adj. R2:                                                              0.645

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [21]:
local_R2 = gwr_results.localR2

In [22]:
min_r2=min(local_R2)[0]
min_r2

0.6725586069258751

In [23]:
diff = (max(local_R2)[0] - min(local_R2)[0])/4
diff

0.03196055737976558

In [24]:
intervals_for_r2 = [round(min_r2+diff*i,2) for i in range(0,5)]
intervals_for_r2

[0.67, 0.7, 0.74, 0.77, 0.8]

In [25]:
str_intervals = []
for i in range(1,5):
    str_intervals.append(str(intervals_for_r2[i-1]) + ' - ' + str(intervals_for_r2[i]))


In [26]:
color_palette = ['#BEC7DA','#7184AD','#395DA3','#03256C']

это значания параметров для каждого наблюдения

In [27]:
len(gwr_results.params[0])

31

это значения t-статистик для каждого наблюдения

In [28]:
len(gwr_results.tvalues[0])

31

---

<h3 style="text-align: center;">Строим карту</h3>

---

In [29]:
import geopandas as gpd
import osmnx as ox

In [30]:
gdf = ox.geocode_to_gdf('Санкт-Петербург', which_result=1)
gdf = gdf.to_crs('EPSG:4326')

In [31]:
import folium
style = {'fillColor': '#450B5C', 'color': '#450B5C','fillOpacity' : 0.05}
m = folium.Map([gdf.centroid.y, gdf.centroid.x], tiles='cartodbpositron')
folium.GeoJson(gdf,style_function=lambda x:style).add_to(m)
folium.FitBounds([[gdf.bounds.miny[0], gdf.bounds.minx[0]],[gdf.bounds.maxy[0], gdf.bounds.maxx[0]]]).add_to(m)


  m = folium.Map([gdf.centroid.y, gdf.centroid.x], tiles='cartodbpositron')
  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


<folium.map.FitBounds at 0x19691fb3520>

In [32]:
for i in range(len(cords_tr)):
    loc_r2 = local_R2[i][0]
    if loc_r2 <= intervals_for_r2[1]:
        clr = color_palette[0]
    elif loc_r2 <= intervals_for_r2[2]:
        clr = color_palette[1]
    elif loc_r2 <= intervals_for_r2[3]:
        clr = color_palette[2]
    else:
        clr = color_palette[3]
    folium.CircleMarker(
        location=cords_tr[i],
        radius=4,
        color='black',
        fill_color = clr,
        weight=1,
        fill=True,
        fill_opacity=1,
        opacity=1,
    ).add_to(m)

In [33]:
str_intervals

['0.67 - 0.7', '0.7 - 0.74', '0.74 - 0.77', '0.77 - 0.8']

In [35]:
from branca.element import Template, MacroElement

template = """
{% macro html(this, kwargs) %}

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>jQuery UI Draggable - Default functionality</title>
  <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">

  <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
  <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
  
  <script>
  $( function() {
    $( "#maplegend" ).draggable({
                    start: function (event, ui) {
                        $(this).css({
                            right: "auto",
                            top: "auto",
                            bottom: "auto"
                        });
                    }
                });
});

  </script>
</head>
<body>

 
<div id='maplegend' class='maplegend' 
    style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
     border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
     
<div class='legend-title'>Local R^2</div>
<div class='legend-scale'>
  <ul class='legend-labels'>
    <li><span style='background:#BEC7DA;opacity:0.7;'></span>0.67 - 0.70</li>
    <li><span style='background:#7184AD;opacity:0.7;'></span>0.70 - 0.74</li>
    <li><span style='background:#395DA3;opacity:0.7;'></span>0.74 - 0.77</li>
    <li><span style='background:#03256C;opacity:0.7;'></span>0.77 - 0.8</li>

  </ul>
</div>
</div>
 
</body>
</html>

<style type='text/css'>
  .maplegend .legend-title {
    text-align: left;
    margin-bottom: 5px;
    font-weight: bold;
    font-size: 90%;
    }
  .maplegend .legend-scale ul {
    margin: 0;
    margin-bottom: 5px;
    padding: 0;
    float: left;
    list-style: none;
    }
  .maplegend .legend-scale ul li {
    font-size: 80%;
    list-style: none;
    margin-left: 0;
    line-height: 18px;
    margin-bottom: 2px;
    }
  .maplegend ul.legend-labels li span {
    display: block;
    float: left;
    height: 17px;
    width: 17px;
    margin-right: 5px;
    margin-left: 0;
    border: 0.2em solid #0F1C3F;
    border-radius: 50%
    }
  .maplegend .legend-source {
    font-size: 80%;
    color: #777;
    clear: both;
    }
  .maplegend a {
    color: #777;
    }
</style>
{% endmacro %}"""

macro = MacroElement()
macro._template = Template(template)
m.get_root().add_child(macro)

---

<h3 style="text-align: center;">Сохраняем карту</h3>

---

In [37]:
m.save('map_price.html')

In [36]:
import io
from PIL import Image

img_data = m._to_png(10)
img = Image.open(io.BytesIO(img_data))
img.save('map_price.png')

---